Setup¶
Retrieve "GL-DPPD-7111_Mmus_Brain_CellType_GeneMarkers.csv" from this GitHub link and save it in the same directory as this notebook. The GitHub path is nasa/GeneLab_Data_Processing/scRNAseq/10X_Chromium_3prime_Data/GeneLab_CellType_GeneMarkers/GL-DPPD-7111_GeneMarker_Files.
Possible Map My Cells region keys = ["RHP", "RSP", "ACA", "PL-ILA-ORB", "AUD-TEa-PERI-ECT", "SS-GU-VISC", "MO-FRP", "PAL", "sAMY", "CTXsp", "HY", "STRv", "OLF", "LSX", "AI", "STRd", "VIS-PTLp", "VIS", "TH", "MOp", "ENT", "HIP", "P", "MB", "MY", "CB", "AUD", "SSp", "TEa-PERI-ECT"]
Use Conda
While in the folder containing this notebook:
conda env create -f rapidsc.yml
Navigate back to where you want to clone the scflow repository (I recommend home)
cd
Clone scflow from GitHub.
git clone git@github.com:easlinger/scflow.git
Navigate to the folder where scflow is:
pip install .
pip install senepy
For NVIDIA Drivers (Linux)
sudo apt update
sudo apt install -y build-essential dkms
sudo apt install -y wget
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin
sudo mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /"
sudo apt update
sudo apt install -y cuda
Imports & Display¶
%load_ext autoreload
%autoreload 2
%matplotlib inline
import os
import re
import logging
import json
try:
import torch
torch.set_float32_matmul_precision("medium")
except Exception:
pass
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import pandas as pd
import numpy as np
import scflow
pd.set_option("display.max_rows", 500) # or None for unlimited rows
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 200)
class CategoricalFilter(logging.Filter):
def filter(self, record):
return "storing" not in record.getMessage() or \
"as categorical" not in record.getMessage()
logger = logging.getLogger("anndata")
logger.addFilter(CategoricalFilter())
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/jaxopt/__init__.py:59: DeprecationWarning: JAXopt is no longer maintained. See https://docs.jax.dev/en/latest/ for alternatives.
warnings.warn(
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/marshmallow/__init__.py:17: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
__version_info__ = tuple(LooseVersion(__version__).version)
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/marshmallow/fields.py:198: RemovedInMarshmallow4Warning: Passing field metadata as a keyword arg is deprecated. Use the explicit `metadata=...` argument instead.
warnings.warn(
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/pkg_resources/__init__.py:3146: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(pkg)
Set Options (ACTIVELY SET THESE!)¶
# Processors to Use
n_processors = os.cpu_count() - 1 # how many processors to use
# If You Want Results Emailed
cur_file = os.path.join(os.path.abspath(""), "create_object_613.ipynb")
html_out = os.path.splitext(cur_file)[0] + ".html"
email = "elizabeth.aslinger@aya.yale.edu"
# set email to None to skip
# Set Data Sources & Species
species = "Mouse"
batches = ["OSD-613"] # in list even if just 1, or list multiple to integrate
# Set Source Data Directory & Output Options
superdirec = "/home/easlinger/data" # directory with original data
direcs = [os.path.join(superdirec, i) for i in batches]
# new h5ad will write to "data" sub-directory of where this notebook is
overwrite = True # allow overwrite of files?
file_concat = os.path.join("data", f"{'_'.join(batches)}_concatenated.h5ad")
file_new = os.path.join("data", f"{'_'.join(batches)}_integrated.h5ad")
# Set Sample & Batch IDs, Plus Other Potential Sources of Batch Effects
col_group = "Group" # age &/or space flight
col_age = "Characteristics[Age at Euthanasia]"
# col_age = "Factor Value[Age]"
col_condition = "Factor Value[Spaceflight]"
col_sample = "sample"
col_batch = "batch" if len(
batches) > 1 else col_group # group=batch if 1 dataset
covariates_categorical = ["Comment[Euthanasia Date]"]
# covariates_categorical = None
# Do Sub-Clustering?
# subcluster_biggest = 1 # sub-cluster biggest cluster
# subcluster_biggest = 3 # sub-cluster biggest 3 clusters
subcluster_biggest = False # no sub-clustering
kws_cluster = dict() # cluster individual samples
# kws_cluster = None # do not cluster individual samples
# Set Annotation Sources
map_my_cells_source = "WMB-10X" if species == "Mouse" else "WHB-10X" if (
species == "Human") else None # Map My Cells atlas source
# map_my_cells_region_keys = None
map_my_cells_region_keys = [
"RSP", "ACA", "PL-ILA-ORB", "AUD-TEa-PERI-ECT", "SS-GU-VISC", "MO-FRP",
"AI", "VIS-PTLp", "VIS", "MOp", "AUD", "SSp",
"TEa-PERI-ECT"] # regional subset for Map My Cells
map_my_cells_cell_keys = ["Isocortex"] # pattern match: feature name column
if species == "Mouse":
model_celltypist = "Mouse_Whole_Brain.pkl"
else:
raise ValueError("Manually set CellTypist model for non-mouse!")
source_patterns = ["Brain", "Cortical", "cortex"] # for ToppGene
# Make Pre-Defined Marker Dictionary
cts_superhierarchical = {
"Neuron": ["Excitatory", "Inhibitory", "Glutamatergic", "GABAergic",
"Dopaminergic", "Serotonergic", "Cholinergic"]
} # if classified as Neuron + other, just keep more specific type(s)
rename_marker_based_annotation = {
"Excitatory | Inhibitory": "Excitatory-Inhibitory",
"Inhibitory | Inhibitory": "Excitatory-Inhibitory"
}
mks_a_priori = pd.read_csv("GL-DPPD-7111_Mmus_Brain_CellType_GeneMarkers.csv")
mks_a_priori.loc[:, "cellName"] = mks_a_priori.cellName.replace({
"Oligodendrocyte precursor cell": "OPC"}).apply(
lambda x: " ".join([i.capitalize() for i in re.sub(
" cell", "", x).split(" ")])).replace({
"Opc": "OPC"}) # capitalize cell names
mks_a_priori = mks_a_priori.set_index("cellName")["geneSymbol"]
mks_a_priori = dict(mks_a_priori.apply(lambda x: set(x.split(","))))
mks_a_priori.update({
"Neuroepithelial": {"Nes", "Notch1", "Sox2", "Sox10", "Hes1", "Hes3"},
"Excitatory": {"Slc17a7", "Slc30a3", "Tcf4",
"Slc17a6", "Slc6a1", "Baiap3",
"Grin1", "Grin2b", "Gls"},
"Inhibitory": {"Gad1", "Slc6a1", "Gabbr1", "Gabbr2",
"Gad2", "Slc32a1", "Oprm1", "Htr2c"},
# "Glutamatergic": {"Slc17a7", "Slc17a6", "Grin1", "Grin2b", "Gls"},
# "GABAergic": {"Slc6a1", "Gabbr1", "Gabbr2", "Gad2", "Gad1"},
# "Dopaminergic": {"Th", "Dat", "Foxa2", "Girk2", "Nurr1", "Lmx1b"},
# "Serotonergic": {"Tph", "Sert", "Pet1"},
# "Cholinergic": {"ChAT", "VAChT", "Acetylcholinesterase"}
})
markers_predefined = {**mks_a_priori}
if cts_superhierarchical is not None:
for i in cts_superhierarchical:
for k in [u for u in cts_superhierarchical[
i] if u in markers_predefined]:
markers_predefined[k] = markers_predefined[k].union(
markers_predefined[i])
_ = [markers_predefined.pop(i) for i in cts_superhierarchical]
mks_collapsed = {**mks_a_priori}
mks_collapsed["Neuron"] = mks_collapsed["Neuron"].union(mks_collapsed[
"Excitatory"]).union(mks_collapsed["Inhibitory"])
_ = mks_collapsed.pop("Excitatory")
_ = mks_collapsed.pop("Inhibitory")
Load Individual Sample Data¶
%%time
# Create a Subdirectory of Working Directory for Data Outputs
os.makedirs("data", exist_ok=True)
# Get Metadata
metadata = [pd.read_csv(os.path.join(
superdirec, f"{i}_metadata_{i}-ISA/s_{i}.txt"), sep=None,
engine="python").set_index("Source Name").rename_axis(
col_sample) for i in batches] # list of metadata
for u in np.arange(len(metadata)): # add a combined age & condition variable
metadata[u] = metadata[u].join(metadata[u].apply(
lambda x: x[col_condition] + str(
" | " + str(x[col_age]) + " Weeks" if (
col_age in metadata[u].columns and len(
metadata[u][col_age].unique()) > 1) else ""),
axis=1).to_frame(col_group)) # add space flight (x age if applicable)
# Load Data
adatas, files = {}, {}
for u, d in enumerate(direcs): # iterate directories, then samples within
for x in [i for i in os.listdir(d) if os.path.isdir(os.path.join(d, i))]:
ddd = [os.path.join(d, x, i) for i in os.listdir(os.path.join(d, x))]
if len(ddd) > 1: # ensure 1 subdirectory (for the sample) in folder
raise ValueError(f"More than one file: {ddd}")
sample = os.path.basename(os.path.join(d, x, ddd[0])) # sample ID
files[sample] = os.path.join(os.path.join(d, x, ddd[0]),
"filtered_feature_bc_matrix.h5") # file
adatas[sample] = sc.read_10x_h5(files[sample]) # read anndata (h5)
if col_batch: # if a batch column specified
adatas[sample].obs.loc[:, col_batch] = batches[u] # batch => .obs
adatas[sample].obs.loc[:, col_sample] = sample # sample ID => .obs
samp_metadata = metadata[u].loc[sample] if (
sample in metadata[u].index.values) else metadata[u].set_index(
"Sample Name").loc[sample] # extract sample-specific metadata
for v in samp_metadata.index.values: # loop metadata => .obs columns
adatas[sample].obs.loc[:, v] = samp_metadata.loc[v]
adatas[sample].obs.loc[:, f"n_cells_original_{col_sample}"] = adatas[
sample].obs.shape[0] # original number of cells
print(files)
metadata
{'RRRM2_BRN_GC_ISS-T_YNG_GY4': '/home/easlinger/data/OSD-613/RRRM2_BRN_GC_ISS-T_YNG_GY4-20250307T191036Z-001/RRRM2_BRN_GC_ISS-T_YNG_GY4/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_GC_ISS-T_YNG_GY9': '/home/easlinger/data/OSD-613/RRRM2_BRN_GC_ISS-T_YNG_GY9-20250307T192252Z-001/RRRM2_BRN_GC_ISS-T_YNG_GY9/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_GC_ISS-T_OLD_GO18': '/home/easlinger/data/OSD-613/RRRM2_BRN_GC_ISS-T_OLD_GO18-20250307T185055Z-001/RRRM2_BRN_GC_ISS-T_OLD_GO18/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_FLT_ISS-T_OLD_FO20': '/home/easlinger/data/OSD-613/RRRM2_BRN_FLT_ISS-T_OLD_FO20-20250307T162503Z-001/RRRM2_BRN_FLT_ISS-T_OLD_FO20/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_GC_ISS-T_OLD_GO19': '/home/easlinger/data/OSD-613/RRRM2_BRN_GC_ISS-T_OLD_GO19-20250307T185555Z-001/RRRM2_BRN_GC_ISS-T_OLD_GO19/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_GC_ISS-T_OLD_GO13': '/home/easlinger/data/OSD-613/RRRM2_BRN_GC_ISS-T_OLD_GO13-20250307T164505Z-001/RRRM2_BRN_GC_ISS-T_OLD_GO13/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_FLT_ISS-T_YNG_FY8': '/home/easlinger/data/OSD-613/RRRM2_BRN_FLT_ISS-T_YNG_FY8-20250307T163836Z-001/RRRM2_BRN_FLT_ISS-T_YNG_FY8/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_FLT_ISS-T_YNG_FY7': '/home/easlinger/data/OSD-613/RRRM2_BRN_FLT_ISS-T_YNG_FY7-20250307T164138Z-001/RRRM2_BRN_FLT_ISS-T_YNG_FY7/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_FLT_ISS-T_OLD_FO19': '/home/easlinger/data/OSD-613/RRRM2_BRN_FLT_ISS-T_OLD_FO19-20250307T161920Z-001/RRRM2_BRN_FLT_ISS-T_OLD_FO19/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_GC_ISS-T_YNG_GY7': '/home/easlinger/data/OSD-613/RRRM2_BRN_GC_ISS-T_YNG_GY7-20250307T191343Z-001/RRRM2_BRN_GC_ISS-T_YNG_GY7/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_FLT_ISS-T_OLD_FO14': '/home/easlinger/data/OSD-613/RRRM2_BRN_FLT_ISS-T_OLD_FO14-20250307T160756Z-001/RRRM2_BRN_FLT_ISS-T_OLD_FO14/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_GC_ISS-T_YNG_GY1': '/home/easlinger/data/OSD-613/RRRM2_BRN_GC_ISS-T_YNG_GY1-20250307T190033Z-001/RRRM2_BRN_GC_ISS-T_YNG_GY1/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_FLT_ISS-T_YNG_FY2': '/home/easlinger/data/OSD-613/RRRM2_BRN_FLT_ISS-T_YNG_FY2-20250307T162616Z-001/RRRM2_BRN_FLT_ISS-T_YNG_FY2/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_FLT_ISS-T_OLD_FO17': '/home/easlinger/data/OSD-613/RRRM2_BRN_FLT_ISS-T_OLD_FO17-20250307T192814Z-001/RRRM2_BRN_FLT_ISS-T_OLD_FO17/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_GC_ISS-T_OLD_GO16': '/home/easlinger/data/OSD-613/RRRM2_BRN_GC_ISS-T_OLD_GO16-20250307T184654Z-001/RRRM2_BRN_GC_ISS-T_OLD_GO16/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_FLT_ISS-T_OLD_FO16': '/home/easlinger/data/OSD-613/RRRM2_BRN_FLT_ISS-T_OLD_FO16-20250307T161325Z-001/RRRM2_BRN_FLT_ISS-T_OLD_FO16/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_GC_ISS-T_YNG_GY2': '/home/easlinger/data/OSD-613/RRRM2_BRN_GC_ISS-T_YNG_GY2-20250307T190542Z-001/RRRM2_BRN_GC_ISS-T_YNG_GY2/filtered_feature_bc_matrix.h5', 'RRRM2_BRN_FLT_ISS-T_YNG_FY5': '/home/easlinger/data/OSD-613/RRRM2_BRN_FLT_ISS-T_YNG_FY5-20250307T163216Z-001/RRRM2_BRN_FLT_ISS-T_YNG_FY5/filtered_feature_bc_matrix.h5'}
CPU times: user 16.2 s, sys: 1.66 s, total: 17.9 s
Wall time: 17.8 s
[ Sample Name Characteristics[Organism] Term Source REF Term Accession Number Characteristics[Strain] Term Source REF.1 \
sample
RRRM-2_FL_ISS_14 RRRM2_BRN_FLT_ISS-T_OLD_FO14 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_FL_ISS_16 RRRM2_BRN_FLT_ISS-T_OLD_FO16 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_FL_ISS_17 RRRM2_BRN_FLT_ISS-T_OLD_FO17 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_FL_ISS_19 RRRM2_BRN_FLT_ISS-T_OLD_FO19 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_FL_ISS_20 RRRM2_BRN_FLT_ISS-T_OLD_FO20 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_FL_ISS_02 RRRM2_BRN_FLT_ISS-T_YNG_FY2 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_FL_ISS_05 RRRM2_BRN_FLT_ISS-T_YNG_FY5 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_FL_ISS_07 RRRM2_BRN_FLT_ISS-T_YNG_FY7 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_FL_ISS_08 RRRM2_BRN_FLT_ISS-T_YNG_FY8 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_HGC_ISS_13 RRRM2_BRN_GC_ISS-T_OLD_GO13 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_HGC_ISS_16 RRRM2_BRN_GC_ISS-T_OLD_GO16 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_HGC_ISS_18 RRRM2_BRN_GC_ISS-T_OLD_GO18 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_HGC_ISS_19 RRRM2_BRN_GC_ISS-T_OLD_GO19 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_HGC_ISS_01 RRRM2_BRN_GC_ISS-T_YNG_GY1 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_HGC_ISS_02 RRRM2_BRN_GC_ISS-T_YNG_GY2 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_HGC_ISS_04 RRRM2_BRN_GC_ISS-T_YNG_GY4 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_HGC_ISS_07 RRRM2_BRN_GC_ISS-T_YNG_GY7 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
RRRM-2_HGC_ISS_09 RRRM2_BRN_GC_ISS-T_YNG_GY9 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON... C57BL/6NTac EFO
Term Accession Number.1 Characteristics[Genotype] Term Source REF.2 Term Accession Number.2 Characteristics[Animal Source] Characteristics[Sex] \
sample
RRRM-2_FL_ISS_14 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_FL_ISS_16 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_FL_ISS_17 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_FL_ISS_19 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_FL_ISS_20 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_FL_ISS_02 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_FL_ISS_05 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_FL_ISS_07 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_FL_ISS_08 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_HGC_ISS_13 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_HGC_ISS_16 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_HGC_ISS_18 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_HGC_ISS_19 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_HGC_ISS_01 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_HGC_ISS_02 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_HGC_ISS_04 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_HGC_ISS_07 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
RRRM-2_HGC_ISS_09 http://www.ebi.ac.uk/efo/EFO_0020093 Wild Type NCIT http://purl.obolibrary.org/obo/NCIT_C62195 Taconic Biosciences Female
Term Source REF.3 Term Accession Number.3 Factor Value[Spaceflight] Term Source REF.4 Term Accession Number.4 \
sample
RRRM-2_FL_ISS_14 MESH http://purl.bioontology.org/ontology/MESH/D005260 Space Flight MESH http://purl.bioontology.org/ontology/MESH/D013026
RRRM-2_FL_ISS_16 MESH http://purl.bioontology.org/ontology/MESH/D005260 Space Flight MESH http://purl.bioontology.org/ontology/MESH/D013026
RRRM-2_FL_ISS_17 MESH http://purl.bioontology.org/ontology/MESH/D005260 Space Flight MESH http://purl.bioontology.org/ontology/MESH/D013026
RRRM-2_FL_ISS_19 MESH http://purl.bioontology.org/ontology/MESH/D005260 Space Flight MESH http://purl.bioontology.org/ontology/MESH/D013026
RRRM-2_FL_ISS_20 MESH http://purl.bioontology.org/ontology/MESH/D005260 Space Flight MESH http://purl.bioontology.org/ontology/MESH/D013026
RRRM-2_FL_ISS_02 MESH http://purl.bioontology.org/ontology/MESH/D005260 Space Flight MESH http://purl.bioontology.org/ontology/MESH/D013026
RRRM-2_FL_ISS_05 MESH http://purl.bioontology.org/ontology/MESH/D005260 Space Flight MESH http://purl.bioontology.org/ontology/MESH/D013026
RRRM-2_FL_ISS_07 MESH http://purl.bioontology.org/ontology/MESH/D005260 Space Flight MESH http://purl.bioontology.org/ontology/MESH/D013026
RRRM-2_FL_ISS_08 MESH http://purl.bioontology.org/ontology/MESH/D005260 Space Flight MESH http://purl.bioontology.org/ontology/MESH/D013026
RRRM-2_HGC_ISS_13 MESH http://purl.bioontology.org/ontology/MESH/D005260 Ground Control OSD https://osdr.nasa.gov/
RRRM-2_HGC_ISS_16 MESH http://purl.bioontology.org/ontology/MESH/D005260 Ground Control OSD https://osdr.nasa.gov/
RRRM-2_HGC_ISS_18 MESH http://purl.bioontology.org/ontology/MESH/D005260 Ground Control OSD https://osdr.nasa.gov/
RRRM-2_HGC_ISS_19 MESH http://purl.bioontology.org/ontology/MESH/D005260 Ground Control OSD https://osdr.nasa.gov/
RRRM-2_HGC_ISS_01 MESH http://purl.bioontology.org/ontology/MESH/D005260 Ground Control OSD https://osdr.nasa.gov/
RRRM-2_HGC_ISS_02 MESH http://purl.bioontology.org/ontology/MESH/D005260 Ground Control OSD https://osdr.nasa.gov/
RRRM-2_HGC_ISS_04 MESH http://purl.bioontology.org/ontology/MESH/D005260 Ground Control OSD https://osdr.nasa.gov/
RRRM-2_HGC_ISS_07 MESH http://purl.bioontology.org/ontology/MESH/D005260 Ground Control OSD https://osdr.nasa.gov/
RRRM-2_HGC_ISS_09 MESH http://purl.bioontology.org/ontology/MESH/D005260 Ground Control OSD https://osdr.nasa.gov/
Factor Value[Age] Unit Term Source REF.5 Term Accession Number.5 Characteristics[Material Type] Term Source REF.6 Term Accession Number.6 \
sample
RRRM-2_FL_ISS_14 29 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_FL_ISS_16 29 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_FL_ISS_17 29 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_FL_ISS_19 29 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_FL_ISS_20 29 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_FL_ISS_02 12 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_FL_ISS_05 12 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_FL_ISS_07 12 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_FL_ISS_08 12 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_HGC_ISS_13 29 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_HGC_ISS_16 29 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_HGC_ISS_18 29 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_HGC_ISS_19 29 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_HGC_ISS_01 12 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_HGC_ISS_02 12 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_HGC_ISS_04 12 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_HGC_ISS_07 12 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
RRRM-2_HGC_ISS_09 12 week UO http://purl.obolibrary.org/obo/UO_0000034 Left cerebral hemisphere FMA http://purl.org/sig/ont/fma/fma61819
Characteristics[diet] Characteristics[Feeding Schedule] Characteristics[Age at Euthanasia] Unit.1 Term Source REF.7 \
sample
RRRM-2_FL_ISS_14 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 37 week UO
RRRM-2_FL_ISS_16 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 37 week UO
RRRM-2_FL_ISS_17 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 37 week UO
RRRM-2_FL_ISS_19 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 37 week UO
RRRM-2_FL_ISS_20 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 37 week UO
RRRM-2_FL_ISS_02 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 20 week UO
RRRM-2_FL_ISS_05 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 20 week UO
RRRM-2_FL_ISS_07 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 20 week UO
RRRM-2_FL_ISS_08 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 20 week UO
RRRM-2_HGC_ISS_13 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 37 week UO
RRRM-2_HGC_ISS_16 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 37 week UO
RRRM-2_HGC_ISS_18 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 37 week UO
RRRM-2_HGC_ISS_19 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 37 week UO
RRRM-2_HGC_ISS_01 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 20 week UO
RRRM-2_HGC_ISS_02 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 20 week UO
RRRM-2_HGC_ISS_04 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 20 week UO
RRRM-2_HGC_ISS_07 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 20 week UO
RRRM-2_HGC_ISS_09 Nutrient Upgraded Rodent Food Bar (NuRFB) ad libitum 20 week UO
Term Accession Number.7 Protocol REF Parameter Value[habitat] Parameter Value[Enrichment material] Parameter Value[duration] \
sample
RRRM-2_FL_ISS_14 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_FL_ISS_16 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_FL_ISS_17 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_FL_ISS_19 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_FL_ISS_20 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_FL_ISS_02 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_FL_ISS_05 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_FL_ISS_07 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_FL_ISS_08 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_HGC_ISS_13 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_HGC_ISS_16 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_HGC_ISS_18 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_HGC_ISS_19 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_HGC_ISS_01 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_HGC_ISS_02 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_HGC_ISS_04 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_HGC_ISS_07 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
RRRM-2_HGC_ISS_09 http://purl.obolibrary.org/obo/UO_0000034 Animal Husbandry Rodent Flight Hardware (Transporter and Habitat) Hut 53
Unit.2 Term Source REF.8 Term Accession Number.8 Parameter Value[light cycle] Protocol REF.1 Parameter Value[Euthanasia Method] \
sample
RRRM-2_FL_ISS_14 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_FL_ISS_16 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_FL_ISS_17 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_FL_ISS_19 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_FL_ISS_20 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_FL_ISS_02 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_FL_ISS_05 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_FL_ISS_07 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_FL_ISS_08 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_HGC_ISS_13 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_HGC_ISS_16 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_HGC_ISS_18 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_HGC_ISS_19 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_HGC_ISS_01 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_HGC_ISS_02 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_HGC_ISS_04 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_HGC_ISS_07 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
RRRM-2_HGC_ISS_09 day UO http://purl.obolibrary.org/obo/UO_0000033 12 h light/dark cycle sample collection Bilateral thoracotomy with sedation, Cardiac p...
Parameter Value[Sample Preservation Method] Term Source REF.9 Term Accession Number.9 Parameter Value[Sample Storage Temperature] Unit.3 \
sample
RRRM-2_FL_ISS_14 Cryochiller OSD https://osdr.nasa.gov/ -80 degree Celsius
RRRM-2_FL_ISS_16 Cryochiller OSD https://osdr.nasa.gov/ -80 degree Celsius
RRRM-2_FL_ISS_17 Cryochiller OSD https://osdr.nasa.gov/ -80 degree Celsius
RRRM-2_FL_ISS_19 Cryochiller OSD https://osdr.nasa.gov/ -80 degree Celsius
RRRM-2_FL_ISS_20 Cryochiller OSD https://osdr.nasa.gov/ -80 degree Celsius
RRRM-2_FL_ISS_02 Cryochiller OSD https://osdr.nasa.gov/ -80 degree Celsius
RRRM-2_FL_ISS_05 Cryochiller OSD https://osdr.nasa.gov/ -80 degree Celsius
RRRM-2_FL_ISS_07 Cryochiller OSD https://osdr.nasa.gov/ -80 degree Celsius
RRRM-2_FL_ISS_08 Cryochiller OSD https://osdr.nasa.gov/ -80 degree Celsius
RRRM-2_HGC_ISS_13 Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius
RRRM-2_HGC_ISS_16 Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius
RRRM-2_HGC_ISS_18 Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius
RRRM-2_HGC_ISS_19 Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius
RRRM-2_HGC_ISS_01 Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius
RRRM-2_HGC_ISS_02 Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius
RRRM-2_HGC_ISS_04 Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius
RRRM-2_HGC_ISS_07 Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius
RRRM-2_HGC_ISS_09 Liquid Nitrogen NCIT http://purl.obolibrary.org/obo/NCIT_C68796 -80 degree Celsius
Term Source REF.10 Term Accession Number.10 Comment[RFID] Comment[Euthanasia Date] Group
sample
RRRM-2_FL_ISS_14 UO http://purl.obolibrary.org/obo/UO_0000027 6E2A180C12 18-Sep-2019 Space Flight | 37 Weeks
RRRM-2_FL_ISS_16 UO http://purl.obolibrary.org/obo/UO_0000027 6E27313758 19-Sep-2019 Space Flight | 37 Weeks
RRRM-2_FL_ISS_17 UO http://purl.obolibrary.org/obo/UO_0000027 6E353B735A 19-Sep-2019 Space Flight | 37 Weeks
RRRM-2_FL_ISS_19 UO http://purl.obolibrary.org/obo/UO_0000027 6E3D2C2D2C 19-Sep-2019 Space Flight | 37 Weeks
RRRM-2_FL_ISS_20 UO http://purl.obolibrary.org/obo/UO_0000027 6E272D285A 19-Sep-2019 Space Flight | 37 Weeks
RRRM-2_FL_ISS_02 UO http://purl.obolibrary.org/obo/UO_0000027 6E394B6C23 16-Sep-2019 Space Flight | 20 Weeks
RRRM-2_FL_ISS_05 UO http://purl.obolibrary.org/obo/UO_0000027 6E3E102A12 16-Sep-2019 Space Flight | 20 Weeks
RRRM-2_FL_ISS_07 UO http://purl.obolibrary.org/obo/UO_0000027 6E3E325E7C 17-Sep-2019 Space Flight | 20 Weeks
RRRM-2_FL_ISS_08 UO http://purl.obolibrary.org/obo/UO_0000027 6E3C42091B 17-Sep-2019 Space Flight | 20 Weeks
RRRM-2_HGC_ISS_13 UO http://purl.obolibrary.org/obo/UO_0000027 6E3A3C1239 19-Sep-2019 Ground Control | 37 Weeks
RRRM-2_HGC_ISS_16 UO http://purl.obolibrary.org/obo/UO_0000027 6E35413D61 20-Sep-2019 Ground Control | 37 Weeks
RRRM-2_HGC_ISS_18 UO http://purl.obolibrary.org/obo/UO_0000027 6E3C216122 20-Sep-2019 Ground Control | 37 Weeks
RRRM-2_HGC_ISS_19 UO http://purl.obolibrary.org/obo/UO_0000027 6E3C342F47 20-Sep-2019 Ground Control | 37 Weeks
RRRM-2_HGC_ISS_01 UO http://purl.obolibrary.org/obo/UO_0000027 6E28307F36 18-Sep-2019 Ground Control | 20 Weeks
RRRM-2_HGC_ISS_02 UO http://purl.obolibrary.org/obo/UO_0000027 6E371E2032 18-Sep-2019 Ground Control | 20 Weeks
RRRM-2_HGC_ISS_04 UO http://purl.obolibrary.org/obo/UO_0000027 6E2A671967 18-Sep-2019 Ground Control | 20 Weeks
RRRM-2_HGC_ISS_07 UO http://purl.obolibrary.org/obo/UO_0000027 6E28536840 18-Sep-2019 Ground Control | 20 Weeks
RRRM-2_HGC_ISS_09 UO http://purl.obolibrary.org/obo/UO_0000027 6E3C705065 19-Sep-2019 Ground Control | 20 Weeks ]
QC¶
Perform Sample-Specific QC¶
%matplotlib inline
plot_qc = False # change to True to get sample-level QC plots (a bit slow)
qcs, n_cells_by_counts, descriptives, figs = scflow.pp.perform_qc_multi(
adatas, col_batch=col_batch, col_sample=col_sample, plot=plot_qc,
percentiles=[0.025, 0.10, 0.25, 0.50, 0.75, 0.85, 0.90, 0.975],
figsize=(10, 10)) # perform QC on individual samples
for x in qcs: # iterate QC metrics % plot percentiles by group
fig = sns.catplot(qcs, y=x, hue=col_batch, kind="violin")
fig.fig.suptitle(x)
fig = sns.catplot(descriptives.loc[:, :, x][[
i for i in descriptives if ("%" in i)]].stack().to_frame("Value"),
x="Metric", y="Value", kind="bar",
hue=col_batch, height=10)
fig.fig.suptitle(x)
descriptives.stack().unstack("Variable").round()
| Variable | n_cells_by_counts | n_genes_by_counts | pct_counts_mt | total_counts | ||
|---|---|---|---|---|---|---|
| sample | Group | Metric | ||||
| RRRM2_BRN_FLT_ISS-T_OLD_FO14 | Space Flight | 37 Weeks | count | 56748.0 | 4260.0 | 4260.0 | 4260.0 |
| mean | 228.0 | 3040.0 | 2.0 | 10679.0 | ||
| std | 548.0 | 1730.0 | 3.0 | 9449.0 | ||
| min | 0.0 | 5.0 | 0.0 | 5.0 | ||
| 2.5% | 0.0 | 719.0 | 0.0 | 1129.0 | ||
| 10% | 0.0 | 931.0 | 0.0 | 1641.0 | ||
| 25% | 0.0 | 1271.0 | 0.0 | 2478.0 | ||
| 50% | 3.0 | 3182.0 | 1.0 | 8793.0 | ||
| 75% | 109.0 | 4415.0 | 2.0 | 15769.0 | ||
| 85% | 440.0 | 4972.0 | 3.0 | 20076.0 | ||
| 90% | 802.0 | 5327.0 | 4.0 | 23291.0 | ||
| 97.5% | 2114.0 | 6255.0 | 10.0 | 33911.0 | ||
| max | 4252.0 | 9857.0 | 46.0 | 69813.0 | ||
| RRRM2_BRN_FLT_ISS-T_OLD_FO16 | Space Flight | 37 Weeks | count | 56748.0 | 4863.0 | 4863.0 | 4863.0 |
| mean | 288.0 | 3358.0 | 4.0 | 11512.0 | ||
| std | 671.0 | 1890.0 | 6.0 | 10039.0 | ||
| min | 0.0 | 71.0 | 0.0 | 139.0 | ||
| 2.5% | 0.0 | 766.0 | 0.0 | 1213.0 | ||
| 10% | 0.0 | 983.0 | 0.0 | 1710.0 | ||
| 25% | 0.0 | 1396.0 | 1.0 | 2727.0 | ||
| 50% | 3.0 | 3551.0 | 3.0 | 9776.0 | ||
| 75% | 130.0 | 4796.0 | 6.0 | 16856.0 | ||
| 85% | 601.0 | 5431.0 | 8.0 | 21349.0 | ||
| 90% | 1077.0 | 5831.0 | 10.0 | 24861.0 | ||
| 97.5% | 2573.0 | 6861.0 | 20.0 | 36130.0 | ||
| max | 4861.0 | 9750.0 | 66.0 | 97274.0 | ||
| RRRM2_BRN_FLT_ISS-T_OLD_FO17 | Space Flight | 37 Weeks | count | 56748.0 | 1493.0 | 1493.0 | 1493.0 |
| mean | 37.0 | 1423.0 | 4.0 | 3043.0 | ||
| std | 106.0 | 1042.0 | 4.0 | 3380.0 | ||
| min | 0.0 | 12.0 | 0.0 | 16.0 | ||
| 2.5% | 0.0 | 316.0 | 0.0 | 376.0 | ||
| 10% | 0.0 | 418.0 | 0.0 | 546.0 | ||
| 25% | 0.0 | 575.0 | 1.0 | 752.0 | ||
| 50% | 0.0 | 990.0 | 4.0 | 1382.0 | ||
| 75% | 13.0 | 2215.0 | 6.0 | 4899.0 | ||
| 85% | 58.0 | 2678.0 | 6.0 | 6604.0 | ||
| 90% | 111.0 | 2918.0 | 7.0 | 7543.0 | ||
| 97.5% | 367.0 | 3803.0 | 12.0 | 11767.0 | ||
| max | 1490.0 | 6533.0 | 49.0 | 31615.0 | ||
| RRRM2_BRN_FLT_ISS-T_OLD_FO19 | Space Flight | 37 Weeks | count | 56748.0 | 7265.0 | 7265.0 | 7265.0 |
| mean | 224.0 | 1747.0 | 5.0 | 4006.0 | ||
| std | 595.0 | 1337.0 | 6.0 | 4217.0 | ||
| min | 0.0 | 2.0 | 0.0 | 2.0 | ||
| 2.5% | 0.0 | 194.0 | 0.0 | 257.0 | ||
| 10% | 0.0 | 376.0 | 0.0 | 491.0 | ||
| 25% | 0.0 | 642.0 | 1.0 | 909.0 | ||
| 50% | 2.0 | 1183.0 | 3.0 | 1901.0 | ||
| 75% | 83.0 | 2873.0 | 7.0 | 6573.0 | ||
| 85% | 390.0 | 3339.0 | 10.0 | 8421.0 | ||
| 90% | 723.0 | 3656.0 | 13.0 | 9694.0 | ||
| 97.5% | 2138.0 | 4636.0 | 21.0 | 14725.0 | ||
| max | 7235.0 | 6922.0 | 43.0 | 32072.0 | ||
| RRRM2_BRN_FLT_ISS-T_OLD_FO20 | Space Flight | 37 Weeks | count | 56748.0 | 5641.0 | 5641.0 | 5641.0 |
| mean | 256.0 | 2575.0 | 3.0 | 6564.0 | ||
| std | 629.0 | 1523.0 | 3.0 | 5498.0 | ||
| min | 0.0 | 3.0 | 0.0 | 4.0 | ||
| 2.5% | 0.0 | 628.0 | 0.0 | 895.0 | ||
| 10% | 0.0 | 795.0 | 0.0 | 1221.0 | ||
| 25% | 0.0 | 1048.0 | 1.0 | 1722.0 | ||
| 50% | 2.0 | 2730.0 | 2.0 | 5725.0 | ||
| 75% | 104.0 | 3763.0 | 4.0 | 9754.0 | ||
| 85% | 498.0 | 4231.0 | 6.0 | 11979.0 | ||
| 90% | 907.0 | 4551.0 | 8.0 | 13725.0 | ||
| 97.5% | 2384.0 | 5545.0 | 13.0 | 19711.0 | ||
| max | 5629.0 | 7819.0 | 29.0 | 38447.0 | ||
| RRRM2_BRN_FLT_ISS-T_YNG_FY2 | Space Flight | 20 Weeks | count | 56748.0 | 4125.0 | 4125.0 | 4125.0 |
| mean | 237.0 | 3261.0 | 2.0 | 11070.0 | ||
| std | 552.0 | 1915.0 | 3.0 | 9862.0 | ||
| min | 0.0 | 5.0 | 0.0 | 5.0 | ||
| 2.5% | 0.0 | 700.0 | 0.0 | 1087.0 | ||
| 10% | 0.0 | 971.0 | 0.0 | 1667.0 | ||
| 25% | 0.0 | 1326.0 | 1.0 | 2493.0 | ||
| 50% | 3.0 | 3534.0 | 1.0 | 9574.0 | ||
| 75% | 113.0 | 4776.0 | 3.0 | 16538.0 | ||
| 85% | 489.0 | 5352.0 | 4.0 | 20849.0 | ||
| 90% | 877.0 | 5761.0 | 5.0 | 24583.0 | ||
| 97.5% | 2119.0 | 6848.0 | 11.0 | 35338.0 | ||
| max | 4117.0 | 9338.0 | 48.0 | 64091.0 | ||
| RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Space Flight | 20 Weeks | count | 56748.0 | 4714.0 | 4714.0 | 4714.0 |
| mean | 279.0 | 3358.0 | 4.0 | 10940.0 | ||
| std | 656.0 | 1779.0 | 5.0 | 9018.0 | ||
| min | 0.0 | 157.0 | 0.0 | 199.0 | ||
| 2.5% | 0.0 | 810.0 | 0.0 | 1300.0 | ||
| 10% | 0.0 | 1024.0 | 1.0 | 1811.0 | ||
| 25% | 0.0 | 1474.0 | 1.0 | 2978.0 | ||
| 50% | 3.0 | 3726.0 | 2.0 | 10188.0 | ||
| 75% | 127.0 | 4678.0 | 5.0 | 15560.0 | ||
| 85% | 566.0 | 5177.0 | 7.0 | 18941.0 | ||
| 90% | 1026.0 | 5521.0 | 10.0 | 21725.0 | ||
| 97.5% | 2544.0 | 6603.0 | 20.0 | 32232.0 | ||
| max | 4714.0 | 11910.0 | 42.0 | 117852.0 | ||
| RRRM2_BRN_FLT_ISS-T_YNG_FY7 | Space Flight | 20 Weeks | count | 56748.0 | 4238.0 | 4238.0 | 4238.0 |
| mean | 128.0 | 1718.0 | 3.0 | 4000.0 | ||
| std | 346.0 | 1204.0 | 3.0 | 3795.0 | ||
| min | 0.0 | 2.0 | 0.0 | 2.0 | ||
| 2.5% | 0.0 | 332.0 | 0.0 | 453.0 | ||
| 10% | 0.0 | 427.0 | 0.0 | 612.0 | ||
| 25% | 0.0 | 570.0 | 1.0 | 864.0 | ||
| 50% | 1.0 | 1644.0 | 1.0 | 3057.0 | ||
| 75% | 49.0 | 2567.0 | 3.0 | 5973.0 | ||
| 85% | 211.0 | 3028.0 | 5.0 | 7670.0 | ||
| 90% | 398.0 | 3371.0 | 6.0 | 9119.0 | ||
| 97.5% | 1255.0 | 4271.0 | 11.0 | 13453.0 | ||
| max | 4227.0 | 6450.0 | 56.0 | 31424.0 | ||
| RRRM2_BRN_FLT_ISS-T_YNG_FY8 | Space Flight | 20 Weeks | count | 56748.0 | 5658.0 | 5658.0 | 5658.0 |
| mean | 299.0 | 3002.0 | 2.0 | 8364.0 | ||
| std | 698.0 | 1854.0 | 3.0 | 7866.0 | ||
| min | 0.0 | 2.0 | 0.0 | 2.0 | ||
| 2.5% | 0.0 | 481.0 | 0.0 | 682.0 | ||
| 10% | 0.0 | 904.0 | 0.0 | 1381.0 | ||
| 25% | 0.0 | 1177.0 | 1.0 | 1971.0 | ||
| 50% | 3.0 | 3180.0 | 1.0 | 6891.0 | ||
| 75% | 137.0 | 4413.0 | 3.0 | 12232.0 | ||
| 85% | 635.0 | 5013.0 | 4.0 | 15381.0 | ||
| 90% | 1121.0 | 5394.0 | 5.0 | 17975.0 | ||
| 97.5% | 2639.0 | 6617.0 | 11.0 | 27907.0 | ||
| max | 5639.0 | 10018.0 | 47.0 | 81281.0 | ||
| RRRM2_BRN_GC_ISS-T_OLD_GO13 | Ground Control | 37 Weeks | count | 56748.0 | 3880.0 | 3880.0 | 3880.0 |
| mean | 190.0 | 2777.0 | 0.0 | 7470.0 | ||
| std | 462.0 | 1513.0 | 1.0 | 6139.0 | ||
| min | 0.0 | 49.0 | 0.0 | 56.0 | ||
| 2.5% | 0.0 | 823.0 | 0.0 | 1283.0 | ||
| 10% | 0.0 | 1035.0 | 0.0 | 1752.0 | ||
| 25% | 0.0 | 1308.0 | 0.0 | 2338.0 | ||
| 50% | 1.0 | 2736.0 | 0.0 | 5855.0 | ||
| 75% | 81.0 | 4017.0 | 0.0 | 10922.0 | ||
| 85% | 371.0 | 4484.0 | 1.0 | 13579.0 | ||
| 90% | 681.0 | 4839.0 | 1.0 | 15959.0 | ||
| 97.5% | 1735.0 | 5684.0 | 2.0 | 23157.0 | ||
| max | 3879.0 | 8613.0 | 17.0 | 47275.0 | ||
| RRRM2_BRN_GC_ISS-T_OLD_GO16 | Ground Control | 37 Weeks | count | 56748.0 | 20000.0 | 20000.0 | 20000.0 |
| mean | 374.0 | 1062.0 | 2.0 | 2148.0 | ||
| std | 1228.0 | 885.0 | 2.0 | 2710.0 | ||
| min | 0.0 | 2.0 | 0.0 | 2.0 | ||
| 2.5% | 0.0 | 201.0 | 0.0 | 235.0 | ||
| 10% | 0.0 | 327.0 | 0.0 | 403.0 | ||
| 25% | 0.0 | 473.0 | 1.0 | 626.0 | ||
| 50% | 3.0 | 727.0 | 1.0 | 1045.0 | ||
| 75% | 114.0 | 1283.0 | 2.0 | 2176.0 | ||
| 85% | 492.0 | 2102.0 | 3.0 | 4598.0 | ||
| 90% | 954.0 | 2487.0 | 4.0 | 5938.0 | ||
| 97.5% | 3744.0 | 3398.0 | 6.0 | 10018.0 | ||
| max | 19922.0 | 6446.0 | 46.0 | 29528.0 | ||
| RRRM2_BRN_GC_ISS-T_OLD_GO18 | Ground Control | 37 Weeks | count | 56748.0 | 6503.0 | 6503.0 | 6503.0 |
| mean | 207.0 | 1804.0 | 1.0 | 4486.0 | ||
| std | 597.0 | 1025.0 | 2.0 | 3585.0 | ||
| min | 0.0 | 6.0 | 0.0 | 6.0 | ||
| 2.5% | 0.0 | 326.0 | 0.0 | 422.0 | ||
| 10% | 0.0 | 499.0 | 0.0 | 724.0 | ||
| 25% | 0.0 | 852.0 | 0.0 | 1406.0 | ||
| 50% | 2.0 | 1859.0 | 0.0 | 3932.0 | ||
| 75% | 70.0 | 2525.0 | 1.0 | 6535.0 | ||
| 85% | 294.0 | 2865.0 | 1.0 | 7913.0 | ||
| 90% | 575.0 | 3103.0 | 2.0 | 9028.0 | ||
| 97.5% | 2165.0 | 3933.0 | 4.0 | 13468.0 | ||
| max | 6487.0 | 5740.0 | 41.0 | 24455.0 | ||
| RRRM2_BRN_GC_ISS-T_OLD_GO19 | Ground Control | 37 Weeks | count | 56748.0 | 8476.0 | 8476.0 | 8476.0 |
| mean | 427.0 | 2859.0 | 1.0 | 7321.0 | ||
| std | 1039.0 | 1505.0 | 2.0 | 5759.0 | ||
| min | 0.0 | 2.0 | 0.0 | 2.0 | ||
| 2.5% | 0.0 | 666.0 | 0.0 | 942.0 | ||
| 10% | 0.0 | 846.0 | 0.0 | 1274.0 | ||
| 25% | 0.0 | 1285.0 | 0.0 | 2102.0 | ||
| 50% | 3.0 | 3122.0 | 1.0 | 6965.0 | ||
| 75% | 177.0 | 3886.0 | 1.0 | 10141.0 | ||
| 85% | 843.0 | 4325.0 | 2.0 | 12228.0 | ||
| 90% | 1533.0 | 4691.0 | 3.0 | 14150.0 | ||
| 97.5% | 3946.0 | 5748.0 | 6.0 | 21190.0 | ||
| max | 8456.0 | 11828.0 | 37.0 | 89715.0 | ||
| RRRM2_BRN_GC_ISS-T_YNG_GY1 | Ground Control | 20 Weeks | count | 56748.0 | 3968.0 | 3968.0 | 3968.0 |
| mean | 246.0 | 3514.0 | 1.0 | 12739.0 | ||
| std | 573.0 | 1800.0 | 1.0 | 10626.0 | ||
| min | 0.0 | 13.0 | 0.0 | 16.0 | ||
| 2.5% | 0.0 | 923.0 | 0.0 | 1490.0 | ||
| 10% | 0.0 | 1189.0 | 0.0 | 2164.0 | ||
| 25% | 0.0 | 1674.0 | 0.0 | 3348.0 | ||
| 50% | 3.0 | 3762.0 | 0.0 | 11094.0 | ||
| 75% | 126.0 | 4900.0 | 1.0 | 18537.0 | ||
| 85% | 498.0 | 5415.0 | 1.0 | 23128.0 | ||
| 90% | 894.0 | 5754.0 | 2.0 | 26395.0 | ||
| 97.5% | 2231.0 | 6747.0 | 4.0 | 37457.0 | ||
| max | 3960.0 | 10980.0 | 27.0 | 90669.0 | ||
| RRRM2_BRN_GC_ISS-T_YNG_GY2 | Ground Control | 20 Weeks | count | 56748.0 | 10779.0 | 10779.0 | 10779.0 |
| mean | 479.0 | 2524.0 | 1.0 | 7477.0 | ||
| std | 1214.0 | 1550.0 | 2.0 | 7294.0 | ||
| min | 0.0 | 6.0 | 0.0 | 6.0 | ||
| 2.5% | 0.0 | 507.0 | 0.0 | 696.0 | ||
| 10% | 0.0 | 703.0 | 0.0 | 1065.0 | ||
| 25% | 0.0 | 974.0 | 0.0 | 1611.0 | ||
| 50% | 4.0 | 2605.0 | 0.0 | 6040.0 | ||
| 75% | 203.0 | 3744.0 | 1.0 | 11368.0 | ||
| 85% | 850.0 | 4232.0 | 2.0 | 14304.0 | ||
| 90% | 1585.0 | 4526.0 | 2.0 | 16462.0 | ||
| 97.5% | 4700.0 | 5465.0 | 5.0 | 23663.0 | ||
| max | 10758.0 | 14427.0 | 39.0 | 279678.0 | ||
| RRRM2_BRN_GC_ISS-T_YNG_GY4 | Ground Control | 20 Weeks | count | 56748.0 | 8542.0 | 8542.0 | 8542.0 |
| mean | 241.0 | 1602.0 | 1.0 | 3563.0 | ||
| std | 687.0 | 970.0 | 2.0 | 3047.0 | ||
| min | 0.0 | 3.0 | 0.0 | 3.0 | ||
| 2.5% | 0.0 | 321.0 | 0.0 | 414.0 | ||
| 10% | 0.0 | 472.0 | 0.0 | 662.0 | ||
| 25% | 0.0 | 726.0 | 0.0 | 1099.0 | ||
| 50% | 2.0 | 1492.0 | 1.0 | 2706.0 | ||
| 75% | 88.0 | 2303.0 | 1.0 | 5259.0 | ||
| 85% | 363.0 | 2624.0 | 2.0 | 6457.0 | ||
| 90% | 693.0 | 2882.0 | 3.0 | 7411.0 | ||
| 97.5% | 2448.0 | 3657.0 | 6.0 | 11122.0 | ||
| max | 8521.0 | 8455.0 | 26.0 | 40300.0 | ||
| RRRM2_BRN_GC_ISS-T_YNG_GY7 | Ground Control | 20 Weeks | count | 56748.0 | 5776.0 | 5776.0 | 5776.0 |
| mean | 304.0 | 2989.0 | 0.0 | 8828.0 | ||
| std | 730.0 | 1660.0 | 1.0 | 7667.0 | ||
| min | 0.0 | 8.0 | 0.0 | 8.0 | ||
| 2.5% | 0.0 | 799.0 | 0.0 | 1176.0 | ||
| 10% | 0.0 | 998.0 | 0.0 | 1596.0 | ||
| 25% | 0.0 | 1315.0 | 0.0 | 2276.0 | ||
| 50% | 3.0 | 3176.0 | 0.0 | 7404.0 | ||
| 75% | 136.0 | 4256.0 | 0.0 | 12784.0 | ||
| 85% | 606.0 | 4764.0 | 1.0 | 16101.0 | ||
| 90% | 1099.0 | 5158.0 | 1.0 | 18690.0 | ||
| 97.5% | 2806.0 | 6192.0 | 3.0 | 28050.0 | ||
| max | 5768.0 | 9784.0 | 30.0 | 61453.0 | ||
| RRRM2_BRN_GC_ISS-T_YNG_GY9 | Ground Control | 20 Weeks | count | 56748.0 | 6044.0 | 6044.0 | 6044.0 |
| mean | 310.0 | 2914.0 | 1.0 | 7814.0 | ||
| std | 753.0 | 1538.0 | 2.0 | 6297.0 | ||
| min | 0.0 | 6.0 | 0.0 | 6.0 | ||
| 2.5% | 0.0 | 703.0 | 0.0 | 1006.0 | ||
| 10% | 0.0 | 910.0 | 0.0 | 1404.0 | ||
| 25% | 0.0 | 1368.0 | 0.0 | 2293.0 | ||
| 50% | 3.0 | 3133.0 | 1.0 | 7126.0 | ||
| 75% | 132.0 | 3980.0 | 1.0 | 10850.0 | ||
| 85% | 609.0 | 4465.0 | 2.0 | 13366.0 | ||
| 90% | 1111.0 | 4814.0 | 2.0 | 15532.0 | ||
| 97.5% | 2884.0 | 5959.0 | 5.0 | 23784.0 | ||
| max | 6028.0 | 10304.0 | 36.0 | 73153.0 |
Auto-Detect Filtering Thresholds¶
Use 2.5th and/& 97.5th percentile (sample-specific) as minimum genes per cell and minimum and maximum total counts (subject to specified absolute minima). Use 97.5th percentile as upper bound for percent mitochondrial count.
Use an absolute minimum cells per gene.
Also include arguments to run a PCA on individual samples before integrating.
# Options
bounds = descriptives[["2.5%", "97.5%"]].apply(lambda x: list(
x), axis=1).unstack("Variable") # list top/bottom 5% (~sample, variable)
abs_min_cells = 20 # regardless of %ile, minimum cells to retain gene
abs_min_genes = 200 # regardless of %ile, minimum genes to retain cell
abs_min_count = 300 # regardless of %ile, minimum counts to retain cell
abs_max_mt = 10 # regardless of %ile, absolute maximum mitochonrial content
n_top_genes = 2000 # number of top genes to count as HVGs
# Set Thresholds
kws_pp = {}
for x in adatas:
b_x = bounds.loc[x]
b_counts = b_x["total_counts"] if isinstance(b_x[
"total_counts"], list) else b_x["total_counts"].iloc[0]
b_counts = [max(b_counts[0], abs_min_count), b_counts[1]]
kws_pp[x] = {
"min_max_genes": [max((b_x["n_genes_by_counts"] if isinstance(
b_x["n_genes_by_counts"], list) else b_x[
"n_genes_by_counts"].iloc[0])[0], abs_min_genes), None],
"min_max_cells": [abs_min_cells, None],
# "min_max_cells": [max((b_x["n_cells_by_counts"] if isinstance(
# b_x["n_genes_by_counts"], list) else b_x[
# "n_genes_by_counts"].iloc[0])[0], abs_min_cells), None],
# "max_mt": min(abs_max_mt, (b_x["pct_counts_mt"] if isinstance(b_x[
# "pct_counts_mt"], list) else b_x["pct_counts_mt"].iloc[0])[1]),
"max_mt": abs_max_mt,
"min_max_counts": b_counts,
# "vars_regress_out": ["total_counts"],
"target_sum": 1e6,
"zero_center": True, "max_value": 10, # scaling
"n_top_genes": n_top_genes,
"doublet_detection": "drop"
}
print("\n".join([f"{s}: {kws_pp[s]}" for s in kws_pp]))
pd.DataFrame(kws_pp).T
RRRM2_BRN_GC_ISS-T_YNG_GY4: {'min_max_genes': [321.0, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [414.0, 11122.175000000005], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_GC_ISS-T_YNG_GY9: {'min_max_genes': [703.0, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [1006.1500000000001, 23784.40000000001], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_GC_ISS-T_OLD_GO18: {'min_max_genes': [326.1, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [422.1, 13468.399999999998], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_FLT_ISS-T_OLD_FO20: {'min_max_genes': [628.0, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [895.0, 19711.0], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_GC_ISS-T_OLD_GO19: {'min_max_genes': [666.0, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [942.0, 21190.0], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_GC_ISS-T_OLD_GO13: {'min_max_genes': [822.975, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [1282.925, 23156.550000000007], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_FLT_ISS-T_YNG_FY8: {'min_max_genes': [481.425, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [681.7, 27907.149999999994], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_FLT_ISS-T_YNG_FY7: {'min_max_genes': [331.925, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [452.77500000000003, 13453.3], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_FLT_ISS-T_OLD_FO19: {'min_max_genes': [200, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [300, 14724.999999999998], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_GC_ISS-T_YNG_GY7: {'min_max_genes': [798.75, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [1176.375, 28050.25], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_FLT_ISS-T_OLD_FO14: {'min_max_genes': [719.475, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [1128.9, 33910.62499999997], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_GC_ISS-T_YNG_GY1: {'min_max_genes': [923.4000000000001, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [1489.7, 37456.77499999999], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_FLT_ISS-T_YNG_FY2: {'min_max_genes': [700.2, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [1087.2, 35338.0], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_FLT_ISS-T_OLD_FO17: {'min_max_genes': [316.3, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [376.3, 11766.900000000009], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_GC_ISS-T_OLD_GO16: {'min_max_genes': [201.0, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [300, 10018.199999999983], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_FLT_ISS-T_OLD_FO16: {'min_max_genes': [766.0, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [1213.2, 36130.25], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_GC_ISS-T_YNG_GY2: {'min_max_genes': [507.0, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [696.45, 23662.899999999972], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
RRRM2_BRN_FLT_ISS-T_YNG_FY5: {'min_max_genes': [809.825, None], 'min_max_cells': [20, None], 'max_mt': 10, 'min_max_counts': [1300.3, 32231.825000000015], 'target_sum': 1000000.0, 'zero_center': True, 'max_value': 10, 'n_top_genes': 2000, 'doublet_detection': 'drop'}
| min_max_genes | min_max_cells | max_mt | min_max_counts | target_sum | zero_center | max_value | n_top_genes | doublet_detection | |
|---|---|---|---|---|---|---|---|---|---|
| RRRM2_BRN_GC_ISS-T_YNG_GY4 | [321.0, None] | [20, None] | 10 | [414.0, 11122.175000000005] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_GC_ISS-T_YNG_GY9 | [703.0, None] | [20, None] | 10 | [1006.1500000000001, 23784.40000000001] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_GC_ISS-T_OLD_GO18 | [326.1, None] | [20, None] | 10 | [422.1, 13468.399999999998] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_FLT_ISS-T_OLD_FO20 | [628.0, None] | [20, None] | 10 | [895.0, 19711.0] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_GC_ISS-T_OLD_GO19 | [666.0, None] | [20, None] | 10 | [942.0, 21190.0] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_GC_ISS-T_OLD_GO13 | [822.975, None] | [20, None] | 10 | [1282.925, 23156.550000000007] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_FLT_ISS-T_YNG_FY8 | [481.425, None] | [20, None] | 10 | [681.7, 27907.149999999994] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_FLT_ISS-T_YNG_FY7 | [331.925, None] | [20, None] | 10 | [452.77500000000003, 13453.3] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_FLT_ISS-T_OLD_FO19 | [200, None] | [20, None] | 10 | [300, 14724.999999999998] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_GC_ISS-T_YNG_GY7 | [798.75, None] | [20, None] | 10 | [1176.375, 28050.25] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_FLT_ISS-T_OLD_FO14 | [719.475, None] | [20, None] | 10 | [1128.9, 33910.62499999997] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_GC_ISS-T_YNG_GY1 | [923.4000000000001, None] | [20, None] | 10 | [1489.7, 37456.77499999999] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_FLT_ISS-T_YNG_FY2 | [700.2, None] | [20, None] | 10 | [1087.2, 35338.0] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_FLT_ISS-T_OLD_FO17 | [316.3, None] | [20, None] | 10 | [376.3, 11766.900000000009] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_GC_ISS-T_OLD_GO16 | [201.0, None] | [20, None] | 10 | [300, 10018.199999999983] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_FLT_ISS-T_OLD_FO16 | [766.0, None] | [20, None] | 10 | [1213.2, 36130.25] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_GC_ISS-T_YNG_GY2 | [507.0, None] | [20, None] | 10 | [696.45, 23662.899999999972] | 1000000.0 | True | 10 | 2000 | drop |
| RRRM2_BRN_FLT_ISS-T_YNG_FY5 | [809.825, None] | [20, None] | 10 | [1300.3, 32231.825000000015] | 1000000.0 | True | 10 | 2000 | drop |
Integrate¶
In-Memory Approach¶
See the "On-Disk Approach" for a more sophisticated approach to defining kws_integrate (more options).
# %%time
# # Integrate
# join_method = "outer" # or "inner"
# kws_integrate = {"kws_pp": kws_pp, "n_top_genes": 10000,
# "fill_value": np.nan,
# "col_batch": None, # suppress using batch as covariate
# "join": join_method, "merge": "unique", "use_rapids": True}
# self = scflow.Rna(adatas, col_sample=col_sample, col_batch=col_batch,
# kws_integrate=kws_integrate)
# self.rna.obs = self.rna.obs.assign(kws_integrate=str(kws_integrate))
# # Write Files for Processed/Integrated Objects?
# if overwrite is True or not os.path.exists(file_new):
# os.makedirs("data", exist_ok=True)
# self.rna.write_h5ad(file_new)
# for x in adatas:
# pfp = os.path.join("data", f"{x}_processed.h5ad")
# if overwrite is True or not os.path.exists(pfp):
# adatas[x].write_h5ad(pfp)
# del adatas # to save memory; now integrated in `self.rna`
# # Display
# print(self.rna)
# # print(self.rna.var.head())
# self.rna.obs
On-Disk Approach¶
Preprocess Individual¶
# Preprocess
if overwrite is False and os.path.exists(pfp):
raise ValueError("Must be able to overwrite to use on-disk option")
files_individual = dict(zip(files.keys(), [os.path.join(
"data", f"{x}_processed.h5ad") for x in files])) # new individual files
var_names = [] # to store genes not filtered out for each sample
for x in files: # iterate sample files
print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}")
adatas[x].obs.loc[:, f"kws_pp_{col_sample}"] = str(kws_pp[x]) # store kws
adatas[x] = scflow.pp.preprocess(
adatas[x], **kws_pp[x], plot_qc=False) # preprocess data
var_names += [set(adatas[x].var_names)] # track what genes still present
# Decide Join Method
shared_genes = set.intersection(*var_names) # genes in all after filtering
all_genes = set.union(*var_names) # genes in any post-filter sample
print(f"{len(shared_genes)} genes present in all samples post-filtering (out"
f" of {len(all_genes)} total genes present in any post-filter sample)")
join_method = "inner" if len(shared_genes) / len(all_genes) >= 0.5 else \
"outer" # outer join if <1/2 of genes shared across all samples
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY4
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 8542 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 14 cells that have less than 321.0 genes expressed
***Filtering genes by cells...
filtered out 37392 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.15
Detected doublet rate = 5.9%
Estimated detectable doublet fraction = 60.3%
Overall doublet rate:
Expected = 5.0%
Estimated = 9.8%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY9
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 6044 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 15 cells that have less than 703.0 genes expressed
***Filtering genes by cells...
filtered out 36336 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.19
Detected doublet rate = 4.0%
Estimated detectable doublet fraction = 55.4%
Overall doublet rate:
Expected = 5.0%
Estimated = 7.2%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO18
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 6503 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 13 cells that have less than 326.1 genes expressed
***Filtering genes by cells...
filtered out 38318 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.16
Detected doublet rate = 4.8%
Estimated detectable doublet fraction = 55.9%
Overall doublet rate:
Expected = 5.0%
Estimated = 8.5%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO20
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 5641 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 11 cells that have less than 628.0 genes expressed
***Filtering genes by cells...
filtered out 37308 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.20
Detected doublet rate = 2.8%
Estimated detectable doublet fraction = 54.3%
Overall doublet rate:
Expected = 5.0%
Estimated = 5.2%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO19
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 8476 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 23 cells that have less than 666.0 genes expressed
***Filtering genes by cells...
filtered out 35414 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.15
Detected doublet rate = 5.6%
Estimated detectable doublet fraction = 66.2%
Overall doublet rate:
Expected = 5.0%
Estimated = 8.5%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO13
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 3880 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 10 cells that have less than 822.975 genes expressed
***Filtering genes by cells...
filtered out 38134 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.22
Detected doublet rate = 2.1%
Estimated detectable doublet fraction = 52.7%
Overall doublet rate:
Expected = 5.0%
Estimated = 4.0%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY8
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 5658 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 5 cells that have less than 481.425 genes expressed
***Filtering genes by cells...
filtered out 36367 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.18
Detected doublet rate = 3.2%
Estimated detectable doublet fraction = 56.2%
Overall doublet rate:
Expected = 5.0%
Estimated = 5.7%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY7
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 4238 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 10 cells that have less than 331.925 genes expressed
***Filtering genes by cells...
filtered out 39609 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.14
Detected doublet rate = 5.8%
Estimated detectable doublet fraction = 59.9%
Overall doublet rate:
Expected = 5.0%
Estimated = 9.7%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO19
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 7265 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 2 cells that have less than 200 genes expressed
***Filtering genes by cells...
filtered out 38066 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.24
Detected doublet rate = 2.3%
Estimated detectable doublet fraction = 28.4%
Overall doublet rate:
Expected = 5.0%
Estimated = 8.2%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY7
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 5776 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 18 cells that have less than 798.75 genes expressed
***Filtering genes by cells...
filtered out 36306 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.18
Detected doublet rate = 3.5%
Estimated detectable doublet fraction = 55.2%
Overall doublet rate:
Expected = 5.0%
Estimated = 6.4%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO14
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 4260 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 13 cells that have less than 719.475 genes expressed
***Filtering genes by cells...
filtered out 36635 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.22
Detected doublet rate = 1.8%
Estimated detectable doublet fraction = 47.7%
Overall doublet rate:
Expected = 5.0%
Estimated = 3.8%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY1
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 3968 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 5 cells that have less than 923.4000000000001 genes expressed
***Filtering genes by cells...
filtered out 36091 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.21
Detected doublet rate = 2.1%
Estimated detectable doublet fraction = 43.2%
Overall doublet rate:
Expected = 5.0%
Estimated = 4.9%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY2
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 4125 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 8 cells that have less than 700.2 genes expressed
***Filtering genes by cells...
filtered out 36646 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.21
Detected doublet rate = 1.9%
Estimated detectable doublet fraction = 46.8%
Overall doublet rate:
Expected = 5.0%
Estimated = 4.0%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO17
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 1493 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 8 cells that have less than 316.3 genes expressed
***Filtering genes by cells...
filtered out 44446 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.20
Detected doublet rate = 2.9%
Estimated detectable doublet fraction = 30.6%
Overall doublet rate:
Expected = 5.0%
Estimated = 9.4%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO16
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 20000 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
***Filtering genes by cells...
filtered out 36883 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.43
Detected doublet rate = 0.3%
Estimated detectable doublet fraction = 7.4%
Overall doublet rate:
Expected = 5.0%
Estimated = 4.2%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO16
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 4863 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 14 cells that have less than 766.0 genes expressed
***Filtering genes by cells...
filtered out 36357 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.21
Detected doublet rate = 2.3%
Estimated detectable doublet fraction = 43.5%
Overall doublet rate:
Expected = 5.0%
Estimated = 5.3%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY2
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 10779 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 14 cells that have less than 507.0 genes expressed
***Filtering genes by cells...
filtered out 34632 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.17
Detected doublet rate = 5.1%
Estimated detectable doublet fraction = 50.9%
Overall doublet rate:
Expected = 5.0%
Estimated = 9.9%
***Normalizing...
***Detecting highly variable genes...
***Scaling data...
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY5
================================================================================
***Activating layer 'counts'...
AnnData object with n_obs × n_vars = 4714 × 56748
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
layers: 'counts'
***Filtering cells by counts...
***Filtering cells by genes...
filtered out 21 cells that have less than 809.825 genes expressed
***Filtering genes by cells...
filtered out 36239 genes that are detected in less than 20 cells
***Filtering cells by mitochondrial gene content...
***Performing doublet detection...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/cuml/internals/api_decorators.py:216: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.) ret = func(*args, **kwargs)
Automatically set threshold at doublet score = 0.18 Detected doublet rate = 3.1% Estimated detectable doublet fraction = 54.1% Overall doublet rate: Expected = 5.0% Estimated = 5.7% ***Normalizing... ***Detecting highly variable genes... ***Scaling data... 12209 genes present in all samples post-filtering (out of 23329 total genes present in any post-filter sample)
Cluster Individual¶
Generalizable¶
# %%time
# # Clustering Parameter Options (Only Applied If `kws_cluster` != None)
# resolution_individual, min_dist_individual = 0.15, 1.5
# col_cluster_individual = "annotation_by_markers_individual"
# unlabeled_cat = "Heterogeneous" # if can't find one best-fit cell label
# cci_scanvi = col_cluster_individual + "_heterogeneous_collapsed"
# sep = " | " # separator for heterogeneous annotations
# # Preprocessing & (Optionally) Clustering
# if overwrite is False and os.path.exists(pfp):
# raise ValueError("Must be able to overwrite to use on-disk option")
# # del adatas
# for x in files: # iterate sample files
# if kws_cluster is not None: # cluster individual sample?
# kws_cl = {"resolution": resolution_individual[x],
# "min_dist": min_dist_individual[x], **kws_cluster}
# self.rna.obs.loc[:, "kws_cluster_individual"] = str(kws_cl)
# adatas[x] = scflow.pp.cluster(
# adatas[x], resolution=resolution_individual,
# min_dist=min_dist_individual, plot=False,
# col_celltype="leiden_individual", **kws_cl) # cluster
# sc.tl.rank_genes_groups(
# adatas[x], "leiden_individual", n_genes=None, rankby_abs=False,
# key_added="rank_genes_groups_leiden_individual",
# copy=False) # find markers/DEGs
# if markers_predefined is not None: # markers specified?
# _, adatas[x] = scflow.pp.annotate_by_marker_overlap(
# adatas[x], markers_predefined,
# col_celltype="leiden_individual",
# col_celltype_new=col_cluster_individual, sep=sep,
# celltypes_superhierarchical=cts_superhierarchical,
# # top_n_markers=20,
# adj_pval_threshold=1e-10,
# method="overlap_coef", inplace=True) # annotate by markers
# if rename_marker_based_annotation is not None:
# adatas[x].obs.loc[:, col_cluster_individual] = adatas[
# x].obs[col_cluster_individual].replace(
# rename_marker_based_annotation) # re-name
# adatas[x].obs.loc[:, cci_scanvi] = adatas[x].obs[
# col_cluster_individual].apply(lambda x: unlabeled_cat if (
# sep in x) else x) # re-label heterogeneous annotations
# var_names += [set(adatas[x].var_names)] # track what genes still there
# # Write Objects
# for x in adatas:
# print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}\n\n{adatas[x]}\n")
# adatas[x].write_h5ad(files_individual[x]) # write individual file
# # Plot
# if kws_cluster is not None:
# for x in files: # iterate sample files
# ccs = [v for v in ["leiden_individual", col_cluster_individual
# ] if v in adatas[x].obs] # columns for UMAP
# print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}")
# sc.pl.umap(adatas[x], color=ccs, wspace=0.4) # plot UMAP
# del adatas # save memory
OSD-613-Specific Tweaks¶
Iterate different clustering parameters to ensure extraction of common cell types
Make sample-specific alterations to one sample's annotation
# Clustering Parameter Options (Only Applied If `kws_cluster` != None)
req_cts = ["Neuron", "OPC", "Oligodendrocyte", "Astrocyte", "Microglial"]
prohib_cts = ["Excitatory-Inhibitory"]
resolution_individual, min_dist_individual = 0.2, 1.5
resn_list = [0.2, 0.5, 0.1, 0.075, 0.15]
dist_list = [1, 0.75, 0.5, 1.5, 0.3]
col_cluster_individual = "annotation_by_markers_individual"
unlabeled_cat = "Heterogeneous" # if can't find one best-fit cell label
cci_scanvi = col_cluster_individual + "_heterogeneous_collapsed"
sep = " | " # separator for heterogeneous annotations
# Clustering
for x in files: # iterate sample files
valid_cts = False
print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}")
for r in resn_list:
if valid_cts is True:
break
for t in dist_list:
if valid_cts is True:
break
c_i = f"leiden_individual_res{r}dist{t}"
cai = f"{col_cluster_individual}_res{r}dist{t}"
kws_cl = {"resolution": r, "min_dist": t}
adatas[x] = scflow.pp.cluster(
adatas[x], plot=False, col_celltype=c_i, **kws_cl)
sc.tl.rank_genes_groups(
adatas[x], c_i, n_genes=None,
key_added=f"rank_genes_groups_{c_i}",
copy=False) # find markers/DEGs
_, adatas[x] = scflow.pp.annotate_by_marker_overlap(
adatas[x], mks_collapsed,
col_celltype=c_i, col_celltype_new=cai, sep=sep,
celltypes_superhierarchical=cts_superhierarchical,
# top_n_markers=20,
adj_pval_threshold=1e-15,
method="overlap_count", inplace=True) # annotate
if rename_marker_based_annotation is not None:
adatas[x].obs.loc[:, cai] = adatas[x].obs[cai].replace(
rename_marker_based_annotation) # re-name
print(adatas[x].obs[cai].unique())
adatas[x].obs.loc[:, cci_scanvi] = adatas[x].obs[
cai].apply(lambda x: unlabeled_cat if (sep in x) else x)
valid_cts = all([q in adatas[x].obs[cai].unique()
for q in req_cts])
valid_cts = valid_cts and (all([q not in adatas[x].obs[
cai].unique() for q in prohib_cts]))
valid_cts = False if any(adatas[x].obs[
cci_scanvi] == unlabeled_cat) else valid_cts
if valid_cts is True:
adatas[x].obs.loc[:, "leiden_individual"] = adatas[x].obs[c_i]
adatas[x].obs.loc[:, col_cluster_individual] = adatas[
x].obs[cai]
adatas[x].obs.loc[:, "kws_cluster_individual"] = str(kws_cl)
print(kws_cl)
# Detect Samples with No Valid Clustering Scheme
no_valid = []
for x in files:
if all((i in adatas[x].obs for i in [
"leiden_individual", col_cluster_individual])) is False:
print(f"No valid clustering found for {x}")
no_valid += [x]
print(f"No Valid: {no_valid}")
# Individual Tweak
samp = "RRRM2_BRN_GC_ISS-T_YNG_GY4"
cols_tries = [i for i in adatas[samp].obs if "leiden_" in i]
cts_try = cols_tries[14]
_, adatas[samp] = scflow.pp.annotate_by_marker_overlap(
adatas[samp], mks_collapsed, col_celltype=cts_try,
col_celltype_new=col_cluster_individual + "_new", sep=sep,
adj_pval_threshold=1e-5,
method="overlap_coef", inplace=True) # annotate by markers
adatas[samp].obs.loc[:, "leiden_individual"] = adatas[samp].obs[cts_try]
adatas[samp].obs.loc[:, col_cluster_individual] = adatas[samp].obs[
col_cluster_individual + "_new"]
adatas[samp].obs["kws_cluster_individual"] = str({"resolution": float(
cts_try.split("res")[1].split("dist")[0]), "min_dist": float(
cts_try.split("res")[1].split("dist")[1])})
# Write Objects
for x in adatas:
print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}\n\n{adatas[x]}\n")
adatas[x].write_h5ad(files_individual[x]) # write individual file
# Print Parameters Used
for x in adatas:
print(adatas[x].obs["kws_cluster_individual"])
# Plot
for x in adatas:
sc.pl.umap(adatas[x], color=[
"leiden_individual", col_cluster_individual], wspace=0.4)
del adatas
================================================================================ RRRM2_BRN_GC_ISS-T_YNG_GY4 ================================================================================ ***Calculating PCA with None components... ***Building neighborhood... ***Embedding UMAP with minimum distance 1... ***Performing Leiden clustering with resolution 0.2...
2025-08-27 12:30:21 | [INFO] init
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'OPC', 'Neuron | Endothelial', 'Oligodendrocyte', 'Astrocyte', 'Microglial']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'OPC', 'Neuron | Endothelial', 'Oligodendrocyte', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.5...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Oligodendrocyte', 'OPC', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1.5...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'OPC', 'Oligodendrocyte', 'Astrocyte', 'Microglial']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.3...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'OPC', 'Oligodendrocyte', 'Astrocyte', 'Microglial']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.5...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'OPC', 'Microglial', 'Neuron | Endothelial', 'Oligodendrocyte', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.5...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'OPC', 'Microglial', 'Neuron | Endothelial', 'Oligodendrocyte', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.5...
***Performing Leiden clustering with resolution 0.5...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'OPC', 'Microglial', 'Neuron | Endothelial', 'Oligodendrocyte', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1.5...
***Performing Leiden clustering with resolution 0.5...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'OPC', 'Microglial', 'Neuron | Endothelial', 'Oligodendrocyte', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.3...
***Performing Leiden clustering with resolution 0.5...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'OPC', 'Microglial', 'Neuron | Endothelial', 'Oligodendrocyte', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.1...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'OPC', 'Neuron | Endothelial', 'Oligodendrocyte', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.1...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'OPC', 'Neuron | Endothelial', 'Oligodendrocyte', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.5...
***Performing Leiden clustering with resolution 0.1...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Oligodendrocyte', 'Astrocyte', 'Microglial', 'OPC']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1.5...
***Performing Leiden clustering with resolution 0.1...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Oligodendrocyte', 'Astrocyte', 'Microglial', 'OPC']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.3...
***Performing Leiden clustering with resolution 0.1...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Oligodendrocyte', 'OPC', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.075...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Microglial', 'Oligodendrocyte', 'OPC', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.075...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Microglial', 'OPC', 'Oligodendrocyte', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.5...
***Performing Leiden clustering with resolution 0.075...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Oligodendrocyte', 'OPC', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1.5...
***Performing Leiden clustering with resolution 0.075...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'OPC', 'Neuron | Endothelial', 'Oligodendrocyte', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.3...
***Performing Leiden clustering with resolution 0.075...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Oligodendrocyte', 'OPC', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.15...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Oligodendrocyte', 'OPC', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.15...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Oligodendrocyte', 'OPC', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.5...
***Performing Leiden clustering with resolution 0.15...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'OPC', 'Oligodendrocyte', 'Microglial', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1.5...
***Performing Leiden clustering with resolution 0.15...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'Oligodendrocyte', 'OPC', 'Astrocyte', 'Microglial']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.3...
***Performing Leiden clustering with resolution 0.15...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Endothelial', 'Astrocyte', 'Neuron | Endothelial']
Categories (7, object): ['Neuron', 'Endothelial', 'Neuron | Endothelial', 'OPC', 'Oligodendrocyte', 'Astrocyte', 'Microglial']
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY9
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Microglial', 'Oligodendrocyte', 'OPC', 'Astrocyte', 'Endothelial']
Categories (6, object): ['Neuron', 'Endothelial', 'Microglial', 'Oligodendrocyte', 'Astrocyte', 'OPC']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO18
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'Microglial', 'Endothelial', 'OPC']
Categories (6, object): ['Neuron', 'Endothelial', 'Microglial', 'Oligodendrocyte', 'Astrocyte', 'OPC']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO20
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Microglial', 'OPC', 'Astrocyte', 'Endothelial']
Categories (6, object): ['Neuron', 'Endothelial', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Microglial']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO19
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.5...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1.5...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.3...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.5...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.5...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.5...
***Performing Leiden clustering with resolution 0.5...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1.5...
***Performing Leiden clustering with resolution 0.5...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.3...
***Performing Leiden clustering with resolution 0.5...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.1...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.1...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.5...
***Performing Leiden clustering with resolution 0.1...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1.5...
***Performing Leiden clustering with resolution 0.1...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.3...
***Performing Leiden clustering with resolution 0.1...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.075...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Astrocyte', 'Endothelial', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.075...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'OPC', 'Astrocyte', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.5...
***Performing Leiden clustering with resolution 0.075...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Astrocyte', 'Endothelial', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1.5...
***Performing Leiden clustering with resolution 0.075...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'OPC', 'Oligodendrocyte', 'Astrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.3...
***Performing Leiden clustering with resolution 0.075...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'OPC', 'Astrocyte', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.15...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.15...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
Categories (5, object): ['Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Oligodendrocyte']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.5...
***Performing Leiden clustering with resolution 0.15...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial', 'Microglial']
Categories (6, object): ['Neuron', 'Astrocyte', 'OPC', 'Microglial', 'Oligodendrocyte', 'Endothelial']
{'resolution': 0.15, 'min_dist': 0.5}
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO13
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Oligodendrocyte', 'Neuron', 'Endothelial', 'Astrocyte', 'OPC', 'Microglial']
Categories (6, object): ['Neuron', 'Astrocyte', 'Endothelial', 'Oligodendrocyte', 'OPC', 'Microglial']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY8
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Microglial', 'Astrocyte', 'OPC']
Categories (5, object): ['Neuron', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Microglial']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY7
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'Microglial', 'OPC', 'Endothelial']
Categories (6, object): ['Neuron', 'Microglial', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Endothelial']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO19
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Endothelial', 'Astrocyte', 'Oligodendrocyte', 'Microglial', 'OPC']
Categories (6, object): ['Endothelial', 'Oligodendrocyte', 'Neuron', 'Microglial', 'Astrocyte', 'OPC']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY7
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
/home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/scanpy/tools/_rank_genes_groups.py:456: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, "names"] = self.var_names[global_indices] /home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/scanpy/tools/_rank_genes_groups.py:458: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, "scores"] = scores[global_indices] /home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/scanpy/tools/_rank_genes_groups.py:461: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, "pvals"] = pvals[global_indices] /home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/scanpy/tools/_rank_genes_groups.py:471: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, "pvals_adj"] = pvals_adj[global_indices] /home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/scanpy/tools/_rank_genes_groups.py:482: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, "logfoldchanges"] = np.log2( /home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/scanpy/tools/_rank_genes_groups.py:456: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, "names"] = self.var_names[global_indices] /home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/scanpy/tools/_rank_genes_groups.py:458: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, "scores"] = scores[global_indices] /home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/scanpy/tools/_rank_genes_groups.py:461: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, "pvals"] = pvals[global_indices] /home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/scanpy/tools/_rank_genes_groups.py:471: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, "pvals_adj"] = pvals_adj[global_indices] /home/easlinger/miniconda3/envs/rsc/lib/python3.13/site-packages/scanpy/tools/_rank_genes_groups.py:482: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, "logfoldchanges"] = np.log2(
['Neuron', 'Oligodendrocyte', 'Microglial', 'Astrocyte', 'OPC', 'Endothelial']
Categories (6, object): ['Neuron', 'Astrocyte', 'Endothelial', 'Oligodendrocyte', 'Microglial', 'OPC']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO14
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Microglial', 'Endothelial']
Categories (6, object): ['Neuron', 'Microglial', 'Endothelial', 'Oligodendrocyte', 'Astrocyte', 'OPC']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY1
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Oligodendrocyte', 'Neuron', 'Microglial', 'Astrocyte', 'OPC', 'Endothelial']
Categories (6, object): ['Neuron', 'Endothelial', 'Microglial', 'OPC', 'Oligodendrocyte', 'Astrocyte']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY2
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Oligodendrocyte', 'Neuron', 'Microglial', 'Astrocyte', 'OPC']
Categories (5, object): ['Neuron', 'Astrocyte', 'Oligodendrocyte', 'Microglial', 'OPC']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO17
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Microglial', 'Oligodendrocyte', 'OPC', 'Astrocyte']
Categories (5, object): ['Neuron', 'Microglial', 'Astrocyte', 'Oligodendrocyte', 'OPC']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO16
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Endothelial', 'Neuron', 'Oligodendrocyte', 'Astrocyte', 'Microglial']
Categories (5, object): ['Endothelial', 'Neuron', 'Oligodendrocyte', 'Astrocyte', 'Microglial']
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 0.75...
***Performing Leiden clustering with resolution 0.2...
['Endothelial', 'OPC', 'Neuron', 'Oligodendrocyte', 'Astrocyte', 'Microglial']
Categories (6, object): ['Endothelial', 'Neuron', 'Oligodendrocyte', 'Astrocyte', 'Microglial', 'OPC']
{'resolution': 0.2, 'min_dist': 0.75}
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO16
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Microglial', 'Astrocyte', 'OPC']
Categories (5, object): ['Neuron', 'Astrocyte', 'OPC', 'Oligodendrocyte', 'Microglial']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY2
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Astrocyte', 'Microglial', 'Neuron', 'OPC', 'Oligodendrocyte', 'Endothelial']
Categories (6, object): ['Neuron', 'Microglial', 'OPC', 'Astrocyte', 'Oligodendrocyte', 'Endothelial']
{'resolution': 0.2, 'min_dist': 1}
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY5
================================================================================
***Calculating PCA with None components...
***Building neighborhood...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.2...
['Neuron', 'Oligodendrocyte', 'Astrocyte', 'OPC', 'Microglial', 'Endothelial']
Categories (6, object): ['Neuron', 'Endothelial', 'OPC', 'Astrocyte', 'Oligodendrocyte', 'Microglial']
{'resolution': 0.2, 'min_dist': 1}
No valid clustering found for RRRM2_BRN_GC_ISS-T_YNG_GY4
No Valid: ['RRRM2_BRN_GC_ISS-T_YNG_GY4']
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY4
================================================================================
AnnData object with n_obs × n_vars = 7547 × 19356
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual_res0.2dist0.75', 'annotation_by_markers_individual_res0.2dist0.75', 'leiden_individual_res0.2dist0.5', 'annotation_by_markers_individual_res0.2dist0.5', 'leiden_individual_res0.2dist1.5', 'annotation_by_markers_individual_res0.2dist1.5', 'leiden_individual_res0.2dist0.3', 'annotation_by_markers_individual_res0.2dist0.3', 'leiden_individual_res0.5dist1', 'annotation_by_markers_individual_res0.5dist1', 'leiden_individual_res0.5dist0.75', 'annotation_by_markers_individual_res0.5dist0.75', 'leiden_individual_res0.5dist0.5', 'annotation_by_markers_individual_res0.5dist0.5', 'leiden_individual_res0.5dist1.5', 'annotation_by_markers_individual_res0.5dist1.5', 'leiden_individual_res0.5dist0.3', 'annotation_by_markers_individual_res0.5dist0.3', 'leiden_individual_res0.1dist1', 'annotation_by_markers_individual_res0.1dist1', 'leiden_individual_res0.1dist0.75', 'annotation_by_markers_individual_res0.1dist0.75', 'leiden_individual_res0.1dist0.5', 'annotation_by_markers_individual_res0.1dist0.5', 'leiden_individual_res0.1dist1.5', 'annotation_by_markers_individual_res0.1dist1.5', 'leiden_individual_res0.1dist0.3', 'annotation_by_markers_individual_res0.1dist0.3', 'leiden_individual_res0.075dist1', 'annotation_by_markers_individual_res0.075dist1', 'leiden_individual_res0.075dist0.75', 'annotation_by_markers_individual_res0.075dist0.75', 'leiden_individual_res0.075dist0.5', 'annotation_by_markers_individual_res0.075dist0.5', 'leiden_individual_res0.075dist1.5', 'annotation_by_markers_individual_res0.075dist1.5', 'leiden_individual_res0.075dist0.3', 'annotation_by_markers_individual_res0.075dist0.3', 'leiden_individual_res0.15dist1', 'annotation_by_markers_individual_res0.15dist1', 'leiden_individual_res0.15dist0.75', 'annotation_by_markers_individual_res0.15dist0.75', 'leiden_individual_res0.15dist0.5', 'annotation_by_markers_individual_res0.15dist0.5', 'leiden_individual_res0.15dist1.5', 'annotation_by_markers_individual_res0.15dist1.5', 'leiden_individual_res0.15dist0.3', 'annotation_by_markers_individual_res0.15dist0.3', 'annotation_by_markers_individual_new', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1', 'leiden_individual_res0.2dist0.75', 'rank_genes_groups_leiden_individual_res0.2dist0.75', 'leiden_individual_res0.2dist0.5', 'rank_genes_groups_leiden_individual_res0.2dist0.5', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5', 'leiden_individual_res0.2dist0.3', 'rank_genes_groups_leiden_individual_res0.2dist0.3', 'leiden_individual_res0.5dist1', 'rank_genes_groups_leiden_individual_res0.5dist1', 'leiden_individual_res0.5dist0.75', 'rank_genes_groups_leiden_individual_res0.5dist0.75', 'leiden_individual_res0.5dist0.5', 'rank_genes_groups_leiden_individual_res0.5dist0.5', 'leiden_individual_res0.5dist1.5', 'rank_genes_groups_leiden_individual_res0.5dist1.5', 'leiden_individual_res0.5dist0.3', 'rank_genes_groups_leiden_individual_res0.5dist0.3', 'leiden_individual_res0.1dist1', 'rank_genes_groups_leiden_individual_res0.1dist1', 'leiden_individual_res0.1dist0.75', 'rank_genes_groups_leiden_individual_res0.1dist0.75', 'leiden_individual_res0.1dist0.5', 'rank_genes_groups_leiden_individual_res0.1dist0.5', 'leiden_individual_res0.1dist1.5', 'rank_genes_groups_leiden_individual_res0.1dist1.5', 'leiden_individual_res0.1dist0.3', 'rank_genes_groups_leiden_individual_res0.1dist0.3', 'leiden_individual_res0.075dist1', 'rank_genes_groups_leiden_individual_res0.075dist1', 'leiden_individual_res0.075dist0.75', 'rank_genes_groups_leiden_individual_res0.075dist0.75', 'leiden_individual_res0.075dist0.5', 'rank_genes_groups_leiden_individual_res0.075dist0.5', 'leiden_individual_res0.075dist1.5', 'rank_genes_groups_leiden_individual_res0.075dist1.5', 'leiden_individual_res0.075dist0.3', 'rank_genes_groups_leiden_individual_res0.075dist0.3', 'leiden_individual_res0.15dist1', 'rank_genes_groups_leiden_individual_res0.15dist1', 'leiden_individual_res0.15dist0.75', 'rank_genes_groups_leiden_individual_res0.15dist0.75', 'leiden_individual_res0.15dist0.5', 'rank_genes_groups_leiden_individual_res0.15dist0.5', 'leiden_individual_res0.15dist1.5', 'rank_genes_groups_leiden_individual_res0.15dist1.5', 'leiden_individual_res0.15dist0.3', 'rank_genes_groups_leiden_individual_res0.15dist0.3', 'annotation_by_markers_individual_new_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY9
================================================================================
AnnData object with n_obs × n_vars = 5465 × 20412
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO18
================================================================================
AnnData object with n_obs × n_vars = 5842 × 18430
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO20
================================================================================
AnnData object with n_obs × n_vars = 4922 × 19440
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO19
================================================================================
AnnData object with n_obs × n_vars = 7539 × 21334
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual_res0.2dist0.75', 'annotation_by_markers_individual_res0.2dist0.75', 'leiden_individual_res0.2dist0.5', 'annotation_by_markers_individual_res0.2dist0.5', 'leiden_individual_res0.2dist1.5', 'annotation_by_markers_individual_res0.2dist1.5', 'leiden_individual_res0.2dist0.3', 'annotation_by_markers_individual_res0.2dist0.3', 'leiden_individual_res0.5dist1', 'annotation_by_markers_individual_res0.5dist1', 'leiden_individual_res0.5dist0.75', 'annotation_by_markers_individual_res0.5dist0.75', 'leiden_individual_res0.5dist0.5', 'annotation_by_markers_individual_res0.5dist0.5', 'leiden_individual_res0.5dist1.5', 'annotation_by_markers_individual_res0.5dist1.5', 'leiden_individual_res0.5dist0.3', 'annotation_by_markers_individual_res0.5dist0.3', 'leiden_individual_res0.1dist1', 'annotation_by_markers_individual_res0.1dist1', 'leiden_individual_res0.1dist0.75', 'annotation_by_markers_individual_res0.1dist0.75', 'leiden_individual_res0.1dist0.5', 'annotation_by_markers_individual_res0.1dist0.5', 'leiden_individual_res0.1dist1.5', 'annotation_by_markers_individual_res0.1dist1.5', 'leiden_individual_res0.1dist0.3', 'annotation_by_markers_individual_res0.1dist0.3', 'leiden_individual_res0.075dist1', 'annotation_by_markers_individual_res0.075dist1', 'leiden_individual_res0.075dist0.75', 'annotation_by_markers_individual_res0.075dist0.75', 'leiden_individual_res0.075dist0.5', 'annotation_by_markers_individual_res0.075dist0.5', 'leiden_individual_res0.075dist1.5', 'annotation_by_markers_individual_res0.075dist1.5', 'leiden_individual_res0.075dist0.3', 'annotation_by_markers_individual_res0.075dist0.3', 'leiden_individual_res0.15dist1', 'annotation_by_markers_individual_res0.15dist1', 'leiden_individual_res0.15dist0.75', 'annotation_by_markers_individual_res0.15dist0.75', 'leiden_individual_res0.15dist0.5', 'annotation_by_markers_individual_res0.15dist0.5', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1', 'leiden_individual_res0.2dist0.75', 'rank_genes_groups_leiden_individual_res0.2dist0.75', 'leiden_individual_res0.2dist0.5', 'rank_genes_groups_leiden_individual_res0.2dist0.5', 'leiden_individual_res0.2dist1.5', 'rank_genes_groups_leiden_individual_res0.2dist1.5', 'leiden_individual_res0.2dist0.3', 'rank_genes_groups_leiden_individual_res0.2dist0.3', 'leiden_individual_res0.5dist1', 'rank_genes_groups_leiden_individual_res0.5dist1', 'leiden_individual_res0.5dist0.75', 'rank_genes_groups_leiden_individual_res0.5dist0.75', 'leiden_individual_res0.5dist0.5', 'rank_genes_groups_leiden_individual_res0.5dist0.5', 'leiden_individual_res0.5dist1.5', 'rank_genes_groups_leiden_individual_res0.5dist1.5', 'leiden_individual_res0.5dist0.3', 'rank_genes_groups_leiden_individual_res0.5dist0.3', 'leiden_individual_res0.1dist1', 'rank_genes_groups_leiden_individual_res0.1dist1', 'leiden_individual_res0.1dist0.75', 'rank_genes_groups_leiden_individual_res0.1dist0.75', 'leiden_individual_res0.1dist0.5', 'rank_genes_groups_leiden_individual_res0.1dist0.5', 'leiden_individual_res0.1dist1.5', 'rank_genes_groups_leiden_individual_res0.1dist1.5', 'leiden_individual_res0.1dist0.3', 'rank_genes_groups_leiden_individual_res0.1dist0.3', 'leiden_individual_res0.075dist1', 'rank_genes_groups_leiden_individual_res0.075dist1', 'leiden_individual_res0.075dist0.75', 'rank_genes_groups_leiden_individual_res0.075dist0.75', 'leiden_individual_res0.075dist0.5', 'rank_genes_groups_leiden_individual_res0.075dist0.5', 'leiden_individual_res0.075dist1.5', 'rank_genes_groups_leiden_individual_res0.075dist1.5', 'leiden_individual_res0.075dist0.3', 'rank_genes_groups_leiden_individual_res0.075dist0.3', 'leiden_individual_res0.15dist1', 'rank_genes_groups_leiden_individual_res0.15dist1', 'leiden_individual_res0.15dist0.75', 'rank_genes_groups_leiden_individual_res0.15dist0.75', 'leiden_individual_res0.15dist0.5', 'rank_genes_groups_leiden_individual_res0.15dist0.5'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO13
================================================================================
AnnData object with n_obs × n_vars = 3595 × 18614
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY8
================================================================================
AnnData object with n_obs × n_vars = 5095 × 20381
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY7
================================================================================
AnnData object with n_obs × n_vars = 3669 × 17139
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO19
================================================================================
AnnData object with n_obs × n_vars = 5747 × 18682
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY7
================================================================================
AnnData object with n_obs × n_vars = 5265 × 20442
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO14
================================================================================
AnnData object with n_obs × n_vars = 3864 × 20113
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY1
================================================================================
AnnData object with n_obs × n_vars = 3672 × 20657
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY2
================================================================================
AnnData object with n_obs × n_vars = 3730 × 20102
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO17
================================================================================
AnnData object with n_obs × n_vars = 1321 × 12302
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_GC_ISS-T_OLD_GO16
================================================================================
AnnData object with n_obs × n_vars = 18386 × 19865
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual_res0.2dist0.75', 'annotation_by_markers_individual_res0.2dist0.75', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1', 'leiden_individual_res0.2dist0.75', 'rank_genes_groups_leiden_individual_res0.2dist0.75'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_FLT_ISS-T_OLD_FO16
================================================================================
AnnData object with n_obs × n_vars = 4050 × 20391
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_GC_ISS-T_YNG_GY2
================================================================================
AnnData object with n_obs × n_vars = 9651 × 22116
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
================================================================================
RRRM2_BRN_FLT_ISS-T_YNG_FY5
================================================================================
AnnData object with n_obs × n_vars = 3914 × 20509
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual'
var: 'gene_ids', 'feature_types', 'genome', 'interval', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts', 'n_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'scrublet', 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden_individual_res0.2dist1', 'rank_genes_groups_leiden_individual_res0.2dist1'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
layers: 'counts', 'log1p', 'scaled'
obsp: 'distances', 'connectivities'
AAACAGCCAATCGCAC-1 {'resolution': 0.1, 'min_dist': 0.3}
AAACAGCCAGCACCAT-1 {'resolution': 0.1, 'min_dist': 0.3}
AAACAGCCAGGTTCAC-1 {'resolution': 0.1, 'min_dist': 0.3}
AAACAGCCATCAATCG-1 {'resolution': 0.1, 'min_dist': 0.3}
AAACAGCCATGTCAAT-1 {'resolution': 0.1, 'min_dist': 0.3}
...
TTTGTTGGTCGTTATC-1 {'resolution': 0.1, 'min_dist': 0.3}
TTTGTTGGTGCAATAT-1 {'resolution': 0.1, 'min_dist': 0.3}
TTTGTTGGTGCAATGC-1 {'resolution': 0.1, 'min_dist': 0.3}
TTTGTTGGTGGTTAGC-1 {'resolution': 0.1, 'min_dist': 0.3}
TTTGTTGGTTGCACAA-1 {'resolution': 0.1, 'min_dist': 0.3}
Name: kws_cluster_individual, Length: 7547, dtype: category
Categories (1, object): ['{'resolution': 0.1, 'min_dist': 0.3}']
AAACAGCCACATACTG-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCAGCTACGT-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCAGGTTCAC-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCATAATCGT-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCATAGACTT-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTTGGTAACGAGG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTAGGATTT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGCTGGTG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGCTTACT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTCGGGAT-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 5465, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCACCATATG-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCACTAGCGT-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCAGTAATAG-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCATCGCTTT-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCATTGTTGG-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGGCAGAGGGAG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGGCATAATGTC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGGCATTAAAGG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTCAGTAAT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGAACAAA-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 5842, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCACAAAGGT-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAAACTGTT-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAAAGGCCA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAATATGGA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAATCCCTT-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTTGGTCAGGCCA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTCATCCTG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGCAACTA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTACTTGC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTGTGATG-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 4922, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCACATAACT-1 {'resolution': 0.15, 'min_dist': 0.5}
AAACAGCCACCGGCTA-1 {'resolution': 0.15, 'min_dist': 0.5}
AAACAGCCACGTAATT-1 {'resolution': 0.15, 'min_dist': 0.5}
AAACAGCCAGGAACAT-1 {'resolution': 0.15, 'min_dist': 0.5}
AAACATGCAAAGGCCA-1 {'resolution': 0.15, 'min_dist': 0.5}
...
TTTGTGTTCTTTAGGA-1 {'resolution': 0.15, 'min_dist': 0.5}
TTTGTTGGTAACAGGG-1 {'resolution': 0.15, 'min_dist': 0.5}
TTTGTTGGTATTACCC-1 {'resolution': 0.15, 'min_dist': 0.5}
TTTGTTGGTCACCAAA-1 {'resolution': 0.15, 'min_dist': 0.5}
TTTGTTGGTTGCACGG-1 {'resolution': 0.15, 'min_dist': 0.5}
Name: kws_cluster_individual, Length: 7539, dtype: category
Categories (1, object): ['{'resolution': 0.15, 'min_dist': 0.5}']
AAACAGCCACAACAGG-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCACATTAAC-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCAGTCTAAT-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAGCACGTT-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAGCTTAAT-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGGCAATAAGCA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGGCAGCACGTT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCACGCGGT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCCTAACGG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCTAATTGG-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 3595, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCAAACCTTG-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAATTGAAG-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCACCTGCTC-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAGGACACA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCATTATGGT-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGTTCGAAGTGA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTCATTGAG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGGTTCTT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGTTTGCT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTGCTGGG-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 5095, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCAAATACCT-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCACCCACAG-1 {'resolution': 0.2, 'min_dist': 1}
AAACCAACAGCTTAGC-1 {'resolution': 0.2, 'min_dist': 1}
AAACCGAAGTGTTGCG-1 {'resolution': 0.2, 'min_dist': 1}
AAACCGCGTACTGAAT-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGTTCATAATCG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCATGACCG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTATTGTGG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTCTAACAG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGGACCTG-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 3669, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCAAGTCGCT-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCAATGAAGC-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCAGGCCAAA-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCATTAGCGC-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAACCCTCC-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGTTCGATTATG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCGCAAACT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCTTAAGTG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTCCCGGAA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTTATGGG-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 5747, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCAATCCCTT-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCACGGTACT-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCAGTCTATG-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCATAATCAC-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCATAGTCAT-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGTTCCATCAGG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCCGCCTAT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCGCTAAAC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCTAAGGTC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTACTTGC-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 5265, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCAAAGCGCA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAATTAGGA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAGGGAGGA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCATGTTGCA-1 {'resolution': 0.2, 'min_dist': 1}
AAACCAACACAACCTA-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGGCATGTCAGC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGGCATTATGAC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCGCACACA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCGTTAGCG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTGAGGTC-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 3864, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCAAATACCT-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCAGAACCGA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAATCCTGA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAGGTTACC-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCATGCTTAG-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGTTCCCATAGG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCGCTAAAC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCGTTATAG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGAAACAA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTACGCAA-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 3672, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCAATTTGGT-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCAGTTTACG-1 {'resolution': 0.2, 'min_dist': 1}
AAACAGCCATCCCTCA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCACCTGTAA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAGCAAGAT-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGGCAATGAAGC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGGCACTAGCGT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCATGCTCC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCCAAATCA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTCGCAAAC-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 3730, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACATGCAAGCCAGA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAATGCCTA-1 {'resolution': 0.2, 'min_dist': 1}
AAACCAACACCGGTAT-1 {'resolution': 0.2, 'min_dist': 1}
AAACCGGCAAGATTCT-1 {'resolution': 0.2, 'min_dist': 1}
AAACCGGCAGGATAAC-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGTTCGCAACAT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCGGTTAGT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTAGGTGTC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGCTCCGT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTTGCTGT-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 1321, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCAAAGCGGC-1 {'resolution': 0.2, 'min_dist': 0.75}
AAACAGCCAACAGCCT-1 {'resolution': 0.2, 'min_dist': 0.75}
AAACAGCCAAGTAAGC-1 {'resolution': 0.2, 'min_dist': 0.75}
AAACAGCCAATAACGA-1 {'resolution': 0.2, 'min_dist': 0.75}
AAACAGCCACATACTG-1 {'resolution': 0.2, 'min_dist': 0.75}
...
TTTGTTGGTGGTGAGA-1 {'resolution': 0.2, 'min_dist': 0.75}
TTTGTTGGTTACATCC-1 {'resolution': 0.2, 'min_dist': 0.75}
TTTGTTGGTTGCAGTA-1 {'resolution': 0.2, 'min_dist': 0.75}
TTTGTTGGTTGGCGTG-1 {'resolution': 0.2, 'min_dist': 0.75}
TTTGTTGGTTTACCGT-1 {'resolution': 0.2, 'min_dist': 0.75}
Name: kws_cluster_individual, Length: 18386, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 0.75}']
AAACAGCCACCCACAG-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAATAGCAA-1 {'resolution': 0.2, 'min_dist': 1}
AAACCAACAGGAAGCC-1 {'resolution': 0.2, 'min_dist': 1}
AAACCAACAGGTCCTG-1 {'resolution': 0.2, 'min_dist': 1}
AAACCAACATGTTGTG-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGTTCATGGTTA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCTACCTGC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTATTACCC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGCTGGTG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGGAAACG-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 4050, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCACAAAGCG-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCAACCTGGT-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCACCAACCG-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCATAATGTC-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCATCCCGCT-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTTGGTTAGGCGT-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTAGTTGG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTATTGCC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTCACCCA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTTACCGT-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 9651, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
AAACAGCCAAGGTGGC-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCATAGCGGA-1 {'resolution': 0.2, 'min_dist': 1}
AAACATGCATCATGTG-1 {'resolution': 0.2, 'min_dist': 1}
AAACCAACACAAGCCT-1 {'resolution': 0.2, 'min_dist': 1}
AAACCAACAGGAAGCC-1 {'resolution': 0.2, 'min_dist': 1}
...
TTTGTGTTCCCTGATC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCCGTAAAC-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTGTTCTGTGCAG-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTGTTAGCA-1 {'resolution': 0.2, 'min_dist': 1}
TTTGTTGGTTAAGTGT-1 {'resolution': 0.2, 'min_dist': 1}
Name: kws_cluster_individual, Length: 3914, dtype: category
Categories (1, object): ['{'resolution': 0.2, 'min_dist': 1}']
Concatenate & Integrate¶
If you are concerned about hard drive space, you may want to delete file_concat and files_individual after running this cell.
%%time
# Integration Options
cct_available = kws_cluster is not None and (
markers_predefined is not None) # individual annotations available?
kws_vi = {"early_stopping": True,
"batch_size": 1024, # raise/lower if more/less than 16 GB VRAM
"max_epochs": 100,
"accelerator": "gpu",
# "categorical_covariate_keys": covariates_categorical,
"n_latent": 40, "n_hidden": 400} # scVI/scANVI arguments
kws_integrate = {
"col_celltype": cci_scanvi if cct_available else None,
"flavor": "scanvi",
# "flavor": "scvi",
# "flavor": "scanorama",
# "flavor": "harmony",
"kws_pp": None, "kws_cluster": None,
# "n_top_genes": 5000,
"n_top_genes": None,
"join": join_method, "merge": "same",
"col_batch": None, # suppress using batch as covariate
"drop_non_hvgs": False,
"use_rapids": True,
"fill_value": np.nan if join_method == "outer" else None,
"out_file": file_concat, **kws_vi
}
# If scVI/scANVI Integration & Individual Annotations Available
if kws_integrate["flavor"] in ["scvi", "scanvi"] and cct_available is True:
kws_integrate.update({"unlabeled_category": unlabeled_cat})
# Integrate & Store Integration Parameters in Object
self = scflow.Rna(files_individual, col_sample=col_sample,
col_batch=col_batch, kws_integrate=kws_integrate)
self.rna.obs = self.rna.obs.assign(kws_integrate=str(kws_integrate))
# Write Files for Processed/Integrated Objects?
if overwrite is True or not os.path.exists(file_new):
self.rna.write_h5ad(file_new)
# Display
print(self.rna)
self.rna.obs
>>>Concatenating data... >>>Finding HVGs for overall data...
GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores HPU available: False, using: 0 HPUs LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
>>>Integrating with respect to sample (SCANVI)...
***Using counts layer for scanvi...
>>>Integrating with respect to sample (SCANVI)...
***Setting up scVI model: {'n_latent': 40, 'n_hidden': 400}...
***Traning scVI: {'max_epochs': 100, 'accelerator': 'gpu', 'batch_size': 1024, 'early_stopping': True}...
Training: 0%| | 0/100 [00:00<?, ?it/s]
`Trainer.fit` stopped: `max_epochs=100` reached.
***Setting up scANVI model: {'n_latent': 40, 'n_hidden': 400}...
***Traning scANVI: {'max_epochs': 100, 'accelerator': 'gpu', 'batch_size': 1024, 'early_stopping': True}...
INFO Training for 100 epochs.
GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores HPU available: False, using: 0 HPUs LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Training: 0%| | 0/100 [00:00<?, ?it/s]
`Trainer.fit` stopped: `max_epochs=100` reached.
count 12209.000000 mean 18390.727742 std 15953.886715 min 553.000000 25% 6592.000000 50% 12862.000000 75% 25092.000000 max 103242.000000 Name: n_cells_by_counts, dtype: float64
AnnData object with n_obs × n_vars = 103274 × 12209
obs: 'Group', 'sample', 'Characteristics[Organism]', 'Term Source REF', 'Term Accession Number', 'Characteristics[Strain]', 'Term Source REF.1', 'Term Accession Number.1', 'Characteristics[Genotype]', 'Term Source REF.2', 'Term Accession Number.2', 'Characteristics[Animal Source]', 'Characteristics[Sex]', 'Term Source REF.3', 'Term Accession Number.3', 'Factor Value[Spaceflight]', 'Term Source REF.4', 'Term Accession Number.4', 'Factor Value[Age]', 'Unit', 'Term Source REF.5', 'Term Accession Number.5', 'Characteristics[Material Type]', 'Term Source REF.6', 'Term Accession Number.6', 'Characteristics[diet]', 'Characteristics[Feeding Schedule]', 'Characteristics[Age at Euthanasia]', 'Unit.1', 'Term Source REF.7', 'Term Accession Number.7', 'Protocol REF', 'Parameter Value[habitat]', 'Parameter Value[Enrichment material]', 'Parameter Value[duration]', 'Unit.2', 'Term Source REF.8', 'Term Accession Number.8', 'Parameter Value[light cycle]', 'Protocol REF.1', 'Parameter Value[Euthanasia Method]', 'Parameter Value[Sample Preservation Method]', 'Term Source REF.9', 'Term Accession Number.9', 'Parameter Value[Sample Storage Temperature]', 'Unit.3', 'Term Source REF.10', 'Term Accession Number.10', 'Comment[RFID]', 'Comment[Euthanasia Date]', 'n_cells_original_sample', 'kws_pp_sample', 'n_genes_by_counts', 'total_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'total_counts_mt', 'pct_counts_mt', 'log1p_total_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'log1p_total_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'log1p_total_counts_hb', 'n_counts', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden_individual_res0.2dist1', 'annotation_by_markers_individual_res0.2dist1', 'annotation_by_markers_individual_heterogeneous_collapsed', 'leiden_individual', 'annotation_by_markers_individual', 'kws_cluster_individual', '_scvi_batch', '_scvi_labels', 'annotation_scanvi', 'kws_integrate'
var: 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'total_counts', 'mean_counts', 'pct_dropout_by_counts', 'log1p_total_counts', 'log1p_mean_counts'
uns: '_scvi_uuid', '_scvi_manager_uuid'
obsm: 'X_pca', 'X_umap', 'X_scANVI', 'X_scVI', 'X_pca_old'
layers: 'counts', 'log1p', 'scaled'
CPU times: user 13min 23s, sys: 1min 58s, total: 15min 21s
Wall time: 13min 46s
| Group | sample | Characteristics[Organism] | Term Source REF | Term Accession Number | Characteristics[Strain] | Term Source REF.1 | Term Accession Number.1 | Characteristics[Genotype] | Term Source REF.2 | Term Accession Number.2 | Characteristics[Animal Source] | Characteristics[Sex] | Term Source REF.3 | Term Accession Number.3 | Factor Value[Spaceflight] | Term Source REF.4 | Term Accession Number.4 | Factor Value[Age] | Unit | Term Source REF.5 | Term Accession Number.5 | Characteristics[Material Type] | Term Source REF.6 | Term Accession Number.6 | Characteristics[diet] | Characteristics[Feeding Schedule] | Characteristics[Age at Euthanasia] | Unit.1 | Term Source REF.7 | Term Accession Number.7 | Protocol REF | Parameter Value[habitat] | Parameter Value[Enrichment material] | Parameter Value[duration] | Unit.2 | Term Source REF.8 | Term Accession Number.8 | Parameter Value[light cycle] | Protocol REF.1 | Parameter Value[Euthanasia Method] | Parameter Value[Sample Preservation Method] | Term Source REF.9 | Term Accession Number.9 | Parameter Value[Sample Storage Temperature] | Unit.3 | Term Source REF.10 | Term Accession Number.10 | Comment[RFID] | Comment[Euthanasia Date] | n_cells_original_sample | kws_pp_sample | n_genes_by_counts | total_counts | log1p_n_genes_by_counts | log1p_total_counts | total_counts_mt | pct_counts_mt | log1p_total_counts_mt | total_counts_ribo | pct_counts_ribo | log1p_total_counts_ribo | total_counts_hb | pct_counts_hb | log1p_total_counts_hb | n_counts | n_genes | doublet_score | predicted_doublet | leiden_individual_res0.2dist1 | annotation_by_markers_individual_res0.2dist1 | annotation_by_markers_individual_heterogeneous_collapsed | leiden_individual | annotation_by_markers_individual | kws_cluster_individual | _scvi_batch | _scvi_labels | annotation_scanvi | kws_integrate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AAACAGCCAATCGCAC-1_RRRM2_BRN_GC_ISS-T_YNG_GY4 | Ground Control | 20 Weeks | RRRM2_BRN_GC_ISS-T_YNG_GY4 | Mus musculus | NCBITAXON | http://purl.bioontology.org/ontology/NCBITAXON... | C57BL/6NTac | EFO | http://www.ebi.ac.uk/efo/EFO_0020093 | Wild Type | NCIT | http://purl.obolibrary.org/obo/NCIT_C62195 | Taconic Biosciences | Female | MESH | http://purl.bioontology.org/ontology/MESH/D005260 | Ground Control | OSD | https://osdr.nasa.gov/ | 12 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Left cerebral hemisphere | FMA | http://purl.org/sig/ont/fma/fma61819 | Nutrient Upgraded Rodent Food Bar (NuRFB) | ad libitum | 20 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Animal Husbandry | Rodent Flight Hardware (Transporter and Habitat) | Hut | 53 | day | UO | http://purl.obolibrary.org/obo/UO_0000033 | 12 h light/dark cycle | sample collection | Bilateral thoracotomy with sedation, Cardiac p... | Liquid Nitrogen | NCIT | http://purl.obolibrary.org/obo/NCIT_C68796 | -80 | degree Celsius | UO | http://purl.obolibrary.org/obo/UO_0000027 | 6E2A671967 | 18-Sep-2019 | 8542 | {'min_max_genes': [321.0, None], 'min_max_cell... | 2074 | 11854.281250 | 7.637716 | 9.380528 | 35.685387 | 0.301034 | 3.602379 | 48.519485 | 0.409299 | 3.902366 | 6.004995 | 0.050657 | 1.946623 | 4962.0 | 2152 | 0.030852 | False | 3 | Neuron | Neuron | 3 | Neuron | {'resolution': 0.1, 'min_dist': 0.3} | 0 | 0 | Neuron | {'col_celltype': 'annotation_by_markers_indivi... |
| AAACAGCCAGCACCAT-1_RRRM2_BRN_GC_ISS-T_YNG_GY4 | Ground Control | 20 Weeks | RRRM2_BRN_GC_ISS-T_YNG_GY4 | Mus musculus | NCBITAXON | http://purl.bioontology.org/ontology/NCBITAXON... | C57BL/6NTac | EFO | http://www.ebi.ac.uk/efo/EFO_0020093 | Wild Type | NCIT | http://purl.obolibrary.org/obo/NCIT_C62195 | Taconic Biosciences | Female | MESH | http://purl.bioontology.org/ontology/MESH/D005260 | Ground Control | OSD | https://osdr.nasa.gov/ | 12 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Left cerebral hemisphere | FMA | http://purl.org/sig/ont/fma/fma61819 | Nutrient Upgraded Rodent Food Bar (NuRFB) | ad libitum | 20 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Animal Husbandry | Rodent Flight Hardware (Transporter and Habitat) | Hut | 53 | day | UO | http://purl.obolibrary.org/obo/UO_0000033 | 12 h light/dark cycle | sample collection | Bilateral thoracotomy with sedation, Cardiac p... | Liquid Nitrogen | NCIT | http://purl.obolibrary.org/obo/NCIT_C68796 | -80 | degree Celsius | UO | http://purl.obolibrary.org/obo/UO_0000027 | 6E2A671967 | 18-Sep-2019 | 8542 | {'min_max_genes': [321.0, None], 'min_max_cell... | 1568 | 9592.913086 | 7.358194 | 9.168884 | 36.192043 | 0.377279 | 3.616095 | 53.094116 | 0.553472 | 3.990726 | 5.822495 | 0.060696 | 1.920225 | 2994.0 | 1636 | 0.052691 | False | 13 | Neuron | Neuron | 12 | Neuron | {'resolution': 0.1, 'min_dist': 0.3} | 0 | 0 | Neuron | {'col_celltype': 'annotation_by_markers_indivi... |
| AAACAGCCAGGTTCAC-1_RRRM2_BRN_GC_ISS-T_YNG_GY4 | Ground Control | 20 Weeks | RRRM2_BRN_GC_ISS-T_YNG_GY4 | Mus musculus | NCBITAXON | http://purl.bioontology.org/ontology/NCBITAXON... | C57BL/6NTac | EFO | http://www.ebi.ac.uk/efo/EFO_0020093 | Wild Type | NCIT | http://purl.obolibrary.org/obo/NCIT_C62195 | Taconic Biosciences | Female | MESH | http://purl.bioontology.org/ontology/MESH/D005260 | Ground Control | OSD | https://osdr.nasa.gov/ | 12 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Left cerebral hemisphere | FMA | http://purl.org/sig/ont/fma/fma61819 | Nutrient Upgraded Rodent Food Bar (NuRFB) | ad libitum | 20 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Animal Husbandry | Rodent Flight Hardware (Transporter and Habitat) | Hut | 53 | day | UO | http://purl.obolibrary.org/obo/UO_0000033 | 12 h light/dark cycle | sample collection | Bilateral thoracotomy with sedation, Cardiac p... | Liquid Nitrogen | NCIT | http://purl.obolibrary.org/obo/NCIT_C68796 | -80 | degree Celsius | UO | http://purl.obolibrary.org/obo/UO_0000027 | 6E2A671967 | 18-Sep-2019 | 8542 | {'min_max_genes': [321.0, None], 'min_max_cell... | 2953 | 15581.877930 | 7.990915 | 9.653928 | 9.529344 | 0.061157 | 2.354166 | 45.639675 | 0.292902 | 3.842452 | 0.000000 | 0.000000 | 0.000000 | 8625.0 | 3080 | 0.014899 | False | 1 | Neuron | Neuron | 2 | Neuron | {'resolution': 0.1, 'min_dist': 0.3} | 0 | 0 | Neuron | {'col_celltype': 'annotation_by_markers_indivi... |
| AAACAGCCATCAATCG-1_RRRM2_BRN_GC_ISS-T_YNG_GY4 | Ground Control | 20 Weeks | RRRM2_BRN_GC_ISS-T_YNG_GY4 | Mus musculus | NCBITAXON | http://purl.bioontology.org/ontology/NCBITAXON... | C57BL/6NTac | EFO | http://www.ebi.ac.uk/efo/EFO_0020093 | Wild Type | NCIT | http://purl.obolibrary.org/obo/NCIT_C62195 | Taconic Biosciences | Female | MESH | http://purl.bioontology.org/ontology/MESH/D005260 | Ground Control | OSD | https://osdr.nasa.gov/ | 12 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Left cerebral hemisphere | FMA | http://purl.org/sig/ont/fma/fma61819 | Nutrient Upgraded Rodent Food Bar (NuRFB) | ad libitum | 20 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Animal Husbandry | Rodent Flight Hardware (Transporter and Habitat) | Hut | 53 | day | UO | http://purl.obolibrary.org/obo/UO_0000033 | 12 h light/dark cycle | sample collection | Bilateral thoracotomy with sedation, Cardiac p... | Liquid Nitrogen | NCIT | http://purl.obolibrary.org/obo/NCIT_C68796 | -80 | degree Celsius | UO | http://purl.obolibrary.org/obo/UO_0000027 | 6E2A671967 | 18-Sep-2019 | 8542 | {'min_max_genes': [321.0, None], 'min_max_cell... | 1071 | 6977.024902 | 6.977281 | 8.850521 | 14.213669 | 0.203721 | 2.722194 | 31.060627 | 0.445184 | 3.467629 | 0.000000 | 0.000000 | 0.000000 | 2017.0 | 1116 | 0.018405 | False | 8 | Oligodendrocyte | Oligodendrocyte | 6 | Oligodendrocyte | {'resolution': 0.1, 'min_dist': 0.3} | 0 | 0 | Oligodendrocyte | {'col_celltype': 'annotation_by_markers_indivi... |
| AAACAGCCATGTCAAT-1_RRRM2_BRN_GC_ISS-T_YNG_GY4 | Ground Control | 20 Weeks | RRRM2_BRN_GC_ISS-T_YNG_GY4 | Mus musculus | NCBITAXON | http://purl.bioontology.org/ontology/NCBITAXON... | C57BL/6NTac | EFO | http://www.ebi.ac.uk/efo/EFO_0020093 | Wild Type | NCIT | http://purl.obolibrary.org/obo/NCIT_C62195 | Taconic Biosciences | Female | MESH | http://purl.bioontology.org/ontology/MESH/D005260 | Ground Control | OSD | https://osdr.nasa.gov/ | 12 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Left cerebral hemisphere | FMA | http://purl.org/sig/ont/fma/fma61819 | Nutrient Upgraded Rodent Food Bar (NuRFB) | ad libitum | 20 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Animal Husbandry | Rodent Flight Hardware (Transporter and Habitat) | Hut | 53 | day | UO | http://purl.obolibrary.org/obo/UO_0000033 | 12 h light/dark cycle | sample collection | Bilateral thoracotomy with sedation, Cardiac p... | Liquid Nitrogen | NCIT | http://purl.obolibrary.org/obo/NCIT_C68796 | -80 | degree Celsius | UO | http://purl.obolibrary.org/obo/UO_0000027 | 6E2A671967 | 18-Sep-2019 | 8542 | {'min_max_genes': [321.0, None], 'min_max_cell... | 2527 | 13878.588867 | 7.835184 | 9.538175 | 24.442604 | 0.176117 | 3.236425 | 46.229156 | 0.333097 | 3.855011 | 0.000000 | 0.000000 | 0.000000 | 6688.0 | 2614 | 0.004741 | False | 1 | Neuron | Neuron | 2 | Neuron | {'resolution': 0.1, 'min_dist': 0.3} | 0 | 0 | Neuron | {'col_celltype': 'annotation_by_markers_indivi... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| TTTGTGTTCCCTGATC-1_RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Space Flight | 20 Weeks | RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Mus musculus | NCBITAXON | http://purl.bioontology.org/ontology/NCBITAXON... | C57BL/6NTac | EFO | http://www.ebi.ac.uk/efo/EFO_0020093 | Wild Type | NCIT | http://purl.obolibrary.org/obo/NCIT_C62195 | Taconic Biosciences | Female | MESH | http://purl.bioontology.org/ontology/MESH/D005260 | Space Flight | MESH | http://purl.bioontology.org/ontology/MESH/D013026 | 12 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Left cerebral hemisphere | FMA | http://purl.org/sig/ont/fma/fma61819 | Nutrient Upgraded Rodent Food Bar (NuRFB) | ad libitum | 20 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Animal Husbandry | Rodent Flight Hardware (Transporter and Habitat) | Hut | 53 | day | UO | http://purl.obolibrary.org/obo/UO_0000033 | 12 h light/dark cycle | sample collection | Bilateral thoracotomy with sedation, Cardiac p... | Cryochiller | OSD | https://osdr.nasa.gov/ | -80 | degree Celsius | UO | http://purl.obolibrary.org/obo/UO_0000027 | 6E3E102A12 | 16-Sep-2019 | 4714 | {'min_max_genes': [809.825, None], 'min_max_ce... | 3951 | 19522.046875 | 8.281977 | 9.879351 | 48.609306 | 0.248997 | 3.904178 | 191.109528 | 0.978942 | 5.258066 | 0.000000 | 0.000000 | 0.000000 | 12819.0 | 4151 | 0.006702 | False | 6 | Neuron | Neuron | 6 | Neuron | {'resolution': 0.2, 'min_dist': 1} | 17 | 0 | Neuron | {'col_celltype': 'annotation_by_markers_indivi... |
| TTTGTGTTCCGTAAAC-1_RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Space Flight | 20 Weeks | RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Mus musculus | NCBITAXON | http://purl.bioontology.org/ontology/NCBITAXON... | C57BL/6NTac | EFO | http://www.ebi.ac.uk/efo/EFO_0020093 | Wild Type | NCIT | http://purl.obolibrary.org/obo/NCIT_C62195 | Taconic Biosciences | Female | MESH | http://purl.bioontology.org/ontology/MESH/D005260 | Space Flight | MESH | http://purl.bioontology.org/ontology/MESH/D013026 | 12 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Left cerebral hemisphere | FMA | http://purl.org/sig/ont/fma/fma61819 | Nutrient Upgraded Rodent Food Bar (NuRFB) | ad libitum | 20 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Animal Husbandry | Rodent Flight Hardware (Transporter and Habitat) | Hut | 53 | day | UO | http://purl.obolibrary.org/obo/UO_0000033 | 12 h light/dark cycle | sample collection | Bilateral thoracotomy with sedation, Cardiac p... | Cryochiller | OSD | https://osdr.nasa.gov/ | -80 | degree Celsius | UO | http://purl.obolibrary.org/obo/UO_0000027 | 6E3E102A12 | 16-Sep-2019 | 4714 | {'min_max_genes': [809.825, None], 'min_max_ce... | 4371 | 21093.955078 | 8.382976 | 9.956789 | 31.026888 | 0.147089 | 3.466576 | 292.743744 | 1.387809 | 5.682708 | 4.184459 | 0.019837 | 1.645665 | 15502.0 | 4645 | 0.007560 | False | 3 | Neuron | Neuron | 3 | Neuron | {'resolution': 0.2, 'min_dist': 1} | 17 | 0 | Neuron | {'col_celltype': 'annotation_by_markers_indivi... |
| TTTGTGTTCTGTGCAG-1_RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Space Flight | 20 Weeks | RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Mus musculus | NCBITAXON | http://purl.bioontology.org/ontology/NCBITAXON... | C57BL/6NTac | EFO | http://www.ebi.ac.uk/efo/EFO_0020093 | Wild Type | NCIT | http://purl.obolibrary.org/obo/NCIT_C62195 | Taconic Biosciences | Female | MESH | http://purl.bioontology.org/ontology/MESH/D005260 | Space Flight | MESH | http://purl.bioontology.org/ontology/MESH/D013026 | 12 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Left cerebral hemisphere | FMA | http://purl.org/sig/ont/fma/fma61819 | Nutrient Upgraded Rodent Food Bar (NuRFB) | ad libitum | 20 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Animal Husbandry | Rodent Flight Hardware (Transporter and Habitat) | Hut | 53 | day | UO | http://purl.obolibrary.org/obo/UO_0000033 | 12 h light/dark cycle | sample collection | Bilateral thoracotomy with sedation, Cardiac p... | Cryochiller | OSD | https://osdr.nasa.gov/ | -80 | degree Celsius | UO | http://purl.obolibrary.org/obo/UO_0000027 | 6E3E102A12 | 16-Sep-2019 | 4714 | {'min_max_genes': [809.825, None], 'min_max_ce... | 5541 | 24471.394531 | 8.620111 | 10.105301 | 58.827507 | 0.240393 | 4.091465 | 238.306396 | 0.973816 | 5.477745 | 0.000000 | 0.000000 | 0.000000 | 27623.0 | 5886 | 0.035294 | False | 4 | Neuron | Neuron | 4 | Neuron | {'resolution': 0.2, 'min_dist': 1} | 17 | 0 | Neuron | {'col_celltype': 'annotation_by_markers_indivi... |
| TTTGTTGGTGTTAGCA-1_RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Space Flight | 20 Weeks | RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Mus musculus | NCBITAXON | http://purl.bioontology.org/ontology/NCBITAXON... | C57BL/6NTac | EFO | http://www.ebi.ac.uk/efo/EFO_0020093 | Wild Type | NCIT | http://purl.obolibrary.org/obo/NCIT_C62195 | Taconic Biosciences | Female | MESH | http://purl.bioontology.org/ontology/MESH/D005260 | Space Flight | MESH | http://purl.bioontology.org/ontology/MESH/D013026 | 12 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Left cerebral hemisphere | FMA | http://purl.org/sig/ont/fma/fma61819 | Nutrient Upgraded Rodent Food Bar (NuRFB) | ad libitum | 20 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Animal Husbandry | Rodent Flight Hardware (Transporter and Habitat) | Hut | 53 | day | UO | http://purl.obolibrary.org/obo/UO_0000033 | 12 h light/dark cycle | sample collection | Bilateral thoracotomy with sedation, Cardiac p... | Cryochiller | OSD | https://osdr.nasa.gov/ | -80 | degree Celsius | UO | http://purl.obolibrary.org/obo/UO_0000027 | 6E3E102A12 | 16-Sep-2019 | 4714 | {'min_max_genes': [809.825, None], 'min_max_ce... | 845 | 5715.998535 | 6.740519 | 8.651199 | 60.961349 | 1.066504 | 4.126511 | 86.941833 | 1.521026 | 4.476676 | 0.000000 | 0.000000 | 0.000000 | 1469.0 | 873 | 0.007560 | False | 13 | Oligodendrocyte | Oligodendrocyte | 13 | Oligodendrocyte | {'resolution': 0.2, 'min_dist': 1} | 17 | 0 | Oligodendrocyte | {'col_celltype': 'annotation_by_markers_indivi... |
| TTTGTTGGTTAAGTGT-1_RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Space Flight | 20 Weeks | RRRM2_BRN_FLT_ISS-T_YNG_FY5 | Mus musculus | NCBITAXON | http://purl.bioontology.org/ontology/NCBITAXON... | C57BL/6NTac | EFO | http://www.ebi.ac.uk/efo/EFO_0020093 | Wild Type | NCIT | http://purl.obolibrary.org/obo/NCIT_C62195 | Taconic Biosciences | Female | MESH | http://purl.bioontology.org/ontology/MESH/D005260 | Space Flight | MESH | http://purl.bioontology.org/ontology/MESH/D013026 | 12 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Left cerebral hemisphere | FMA | http://purl.org/sig/ont/fma/fma61819 | Nutrient Upgraded Rodent Food Bar (NuRFB) | ad libitum | 20 | week | UO | http://purl.obolibrary.org/obo/UO_0000034 | Animal Husbandry | Rodent Flight Hardware (Transporter and Habitat) | Hut | 53 | day | UO | http://purl.obolibrary.org/obo/UO_0000033 | 12 h light/dark cycle | sample collection | Bilateral thoracotomy with sedation, Cardiac p... | Cryochiller | OSD | https://osdr.nasa.gov/ | -80 | degree Celsius | UO | http://purl.obolibrary.org/obo/UO_0000027 | 6E3E102A12 | 16-Sep-2019 | 4714 | {'min_max_genes': [809.825, None], 'min_max_ce... | 3268 | 17057.070312 | 8.092239 | 9.744379 | 24.902000 | 0.145992 | 3.254320 | 154.146927 | 0.903713 | 5.044373 | 5.381324 | 0.031549 | 1.853376 | 9258.0 | 3411 | 0.058924 | False | 4 | Neuron | Neuron | 4 | Neuron | {'resolution': 0.2, 'min_dist': 1} | 17 | 0 | Neuron | {'col_celltype': 'annotation_by_markers_indivi... |
103274 rows × 79 columns
Benchmark Integration¶
# if kws_integrate["flavor"] == "scanvi":
# try:
# results_integration_benchmark = scflow.pp.benchmark_integration(
# self.rna, col_sample, col_celltype="annotation_scanvi")
# except Exception as err:
# print(err)
Clustering¶
Perform PCA, UMAP embedding, and Leiden clustering on the integrated object
%%time
# Clustering Options
resolution, min_dist = 0.025, 1
# resolution, min_dist = 0.027, 1.3
# resolution, min_dist = 0.012, 1.5
# resolution, min_dist = 0.027, 1.3
# resolution, min_dist = 0.035, 1.3
# resolution, min_dist = 0.02, 0.6
# resolution, min_dist = 0.018, 0.7
# resolution, min_dist = 0.012, 0.5
# resolution, min_dist = 0.0115, 1.5
# resolution, min_dist = 0.011, 0.5
n_neighbors = 40
cct = "leiden" # key added/column name
# Clustering
self.cluster(col_celltype=cct, resolution=resolution, min_dist=min_dist,
kws_pca=False, layer="scaled",
kws_neighbors=dict(n_neighbors=n_neighbors)) # cluster
self.rna.obs = self.rna.obs.assign(**{
f"{cct}_resolution": resolution}).assign(
**{f"{cct}_n_neighbors": n_neighbors}).assign(
**{f"{cct}_min_dist": min_dist}) # store parameters
_ = self.plot(kind="umap", wspace=0.5, palette="tab20",
color=[col_batch, "leiden", col_sample]) # plot UMAP
print(self.rna.obs["leiden"].value_counts().to_frame("n_cells")) # N/cluster
print(self.rna.obs.groupby(col_sample).apply(lambda x: x[
cct].value_counts(), include_groups=False).unstack(1)) # by group
self.find_markers(col_celltype=cct) # DEGs by cluster
col_celltype = "leiden"
self._info["col_celltype"] = col_celltype
***Building neighborhood with 40 neighbors...
***Embedding UMAP with minimum distance 1...
***Performing Leiden clustering with resolution 0.025...
n_cells
leiden
0 21527
1 19639
5 19269
2 18451
6 8753
4 7160
8 3200
7 3117
3 2158
leiden 0 1 2 3 4 5 6 7 8
sample
RRRM2_BRN_GC_ISS-T_YNG_GY4 610 1491 1219 1237 552 1071 946 190 231
RRRM2_BRN_GC_ISS-T_YNG_GY9 420 1394 908 69 493 1066 731 197 187
RRRM2_BRN_GC_ISS-T_OLD_GO18 490 1114 801 44 559 1432 1039 177 186
RRRM2_BRN_FLT_ISS-T_OLD_FO20 445 833 1242 60 347 1141 500 173 181
RRRM2_BRN_GC_ISS-T_OLD_GO19 591 1619 1365 68 522 1404 1447 255 268
RRRM2_BRN_GC_ISS-T_OLD_GO13 213 960 989 74 341 531 267 132 88
RRRM2_BRN_FLT_ISS-T_YNG_FY8 236 1686 1194 32 422 532 485 219 289
RRRM2_BRN_FLT_ISS-T_YNG_FY7 244 936 1047 38 257 755 102 127 163
RRRM2_BRN_FLT_ISS-T_OLD_FO19 1851 734 762 62 323 703 1047 139 126
RRRM2_BRN_GC_ISS-T_YNG_GY7 312 1491 1205 88 508 851 428 209 173
RRRM2_BRN_FLT_ISS-T_OLD_FO14 352 687 899 31 308 1190 105 149 143
RRRM2_BRN_GC_ISS-T_YNG_GY1 309 893 767 35 319 975 124 131 119
RRRM2_BRN_FLT_ISS-T_YNG_FY2 388 765 896 20 296 1015 26 152 172
RRRM2_BRN_FLT_ISS-T_OLD_FO17 722 94 135 20 39 137 126 21 27
RRRM2_BRN_GC_ISS-T_OLD_GO16 12847 1026 1223 130 536 1564 637 217 206
RRRM2_BRN_FLT_ISS-T_OLD_FO16 301 878 768 20 299 1359 117 130 178
RRRM2_BRN_GC_ISS-T_YNG_GY2 815 2444 2346 109 747 2205 319 354 312
RRRM2_BRN_FLT_ISS-T_YNG_FY5 381 594 685 21 292 1338 307 145 151
CPU times: user 25.5 s, sys: 2.28 s, total: 27.8 s Wall time: 10.5 s
Sub-Clustering¶
Sub-cluster the biggest <subcluster_biggest> Leiden clusters?
For instance if subcluster_biggest = 3, take top three most abundant (highest cell count) clusters and sub-cluster them.
resolution_sub = None if subcluster_biggest is False else 0.005
if subcluster_biggest is not False:
col_celltype = "leiden_subcluster"
biggest_clusters = self.rna.obs["leiden"].value_counts().index.values[
:min(len(self.rna.obs["leiden"].unique()), subcluster_biggest)]
self.rna.obs.loc[:, col_celltype] = self.rna.obs.leiden.copy()
for x in biggest_clusters:
sc.tl.leiden(self.rna, restrict_to=(col_celltype, [x]),
key_added=col_celltype, resolution=resolution_sub,
seed=0, n_iterations=-1, use_weights=True)
self.find_markers(col_celltype=col_celltype) # DEGs by cluster
self.plot(kind="umap", color=col_celltype)
print(self.rna.obs[[col_sample, col_celltype]].groupby(
col_sample).value_counts().unstack(1))
self.rna.obs.loc[:, f"resolution_{col_celltype}"] = resolution_sub
self._info["col_celltype"] = col_celltype
Annotate¶
Annotate cell types with various methods
Examine Marker Genes¶
This code looks at top markers by log2fold-change and adjusted p-value cutoffs and sorts by adjusted p-values.
Use kind = ["heat", "dot"] to get dot plots too.
markers_df = self.get_markers_df(
n_genes=15, col_celltype=col_celltype,
p_threshold=1e-10, log2fc_threshold=1.5, log2fc_threshold_abs=False)
markers_dict = dict(markers_df.groupby(col_celltype).apply(
lambda x: list(x.reset_index().names)))
_ = self.plot(genes=markers_dict, figsize=(15, 15), vmin=0, vmax=0.5,
layer="scaled", standard_scale="obs", kind="heat")
markers_df
WARNING: Gene labels are not shown when more than 50 genes are visualized. To show gene labels set `show_gene_labels=True`
| scores | logfoldchanges | pvals | pvals_adj | ||
|---|---|---|---|---|---|
| leiden | names | ||||
| 5 | Smurf1 | 52.179630 | 1.787631 | 0.000000e+00 | 0.000000e+00 |
| Arl3 | 52.371861 | 1.716211 | 0.000000e+00 | 0.000000e+00 | |
| Pfkm | 52.359268 | 1.858708 | 0.000000e+00 | 0.000000e+00 | |
| Usf3 | 52.342121 | 1.884558 | 0.000000e+00 | 0.000000e+00 | |
| Ccn3 | 52.329323 | 3.249097 | 0.000000e+00 | 0.000000e+00 | |
| Gm12371 | 52.299938 | 3.280986 | 0.000000e+00 | 0.000000e+00 | |
| Ubac2 | 52.276756 | 1.724732 | 0.000000e+00 | 0.000000e+00 | |
| Entr1 | 52.269951 | 1.826661 | 0.000000e+00 | 0.000000e+00 | |
| Tmem151a | 52.265415 | 1.772315 | 0.000000e+00 | 0.000000e+00 | |
| Nceh1 | 52.231071 | 1.787486 | 0.000000e+00 | 0.000000e+00 | |
| Pls3 | 52.228706 | 1.889730 | 0.000000e+00 | 0.000000e+00 | |
| Mrpl41 | 52.227352 | 1.842142 | 0.000000e+00 | 0.000000e+00 | |
| Ncs1 | 52.216713 | 1.837653 | 0.000000e+00 | 0.000000e+00 | |
| Adk | 52.202930 | 1.626228 | 0.000000e+00 | 0.000000e+00 | |
| Btbd7 | 52.185795 | 1.655978 | 0.000000e+00 | 0.000000e+00 | |
| 1 | Ipo9 | 49.887600 | 1.806189 | 0.000000e+00 | 0.000000e+00 |
| Kif16b | 49.997070 | 1.691769 | 0.000000e+00 | 0.000000e+00 | |
| Slc39a6 | 49.989765 | 2.303841 | 0.000000e+00 | 0.000000e+00 | |
| Ssb | 49.983730 | 1.716834 | 0.000000e+00 | 0.000000e+00 | |
| Ubac2 | 49.975971 | 1.678680 | 0.000000e+00 | 0.000000e+00 | |
| 9330159M07Rik | 49.970547 | 2.704767 | 0.000000e+00 | 0.000000e+00 | |
| Erp29 | 49.963879 | 1.781210 | 0.000000e+00 | 0.000000e+00 | |
| Gpc1 | 49.954193 | 2.234042 | 0.000000e+00 | 0.000000e+00 | |
| Reps1 | 49.947380 | 1.714474 | 0.000000e+00 | 0.000000e+00 | |
| Vstm5 | 49.922413 | 2.318811 | 0.000000e+00 | 0.000000e+00 | |
| Mapkap1 | 49.919682 | 1.612838 | 0.000000e+00 | 0.000000e+00 | |
| Tmem169 | 49.919678 | 2.276560 | 0.000000e+00 | 0.000000e+00 | |
| Clpx | 49.903904 | 1.756471 | 0.000000e+00 | 0.000000e+00 | |
| Map3k7 | 49.903728 | 1.696558 | 0.000000e+00 | 0.000000e+00 | |
| Btaf1 | 49.999718 | 1.586228 | 0.000000e+00 | 0.000000e+00 | |
| 0 | Nrg3 | 142.452026 | 4.107781 | 0.000000e+00 | 0.000000e+00 |
| Dpp10 | 138.951385 | 4.910851 | 0.000000e+00 | 0.000000e+00 | |
| Kcnip4 | 138.293945 | 4.104700 | 0.000000e+00 | 0.000000e+00 | |
| Rbfox1 | 137.393112 | 3.926455 | 0.000000e+00 | 0.000000e+00 | |
| Celf2 | 131.670334 | 3.144436 | 0.000000e+00 | 0.000000e+00 | |
| Csmd1 | 128.779266 | 3.637968 | 0.000000e+00 | 0.000000e+00 | |
| Nrxn3 | 125.969872 | 3.488383 | 0.000000e+00 | 0.000000e+00 | |
| Nrg1 | 113.331749 | 3.747945 | 0.000000e+00 | 0.000000e+00 | |
| Syt1 | 112.355385 | 3.356306 | 0.000000e+00 | 0.000000e+00 | |
| Meg3 | 111.013176 | 3.171908 | 0.000000e+00 | 0.000000e+00 | |
| Sgcz | 100.021637 | 3.458776 | 0.000000e+00 | 0.000000e+00 | |
| Fgf14 | 99.979164 | 2.813265 | 0.000000e+00 | 0.000000e+00 | |
| Ptprd | 97.597748 | 1.844778 | 0.000000e+00 | 0.000000e+00 | |
| Cntnap2 | 93.390694 | 2.978568 | 0.000000e+00 | 0.000000e+00 | |
| Snhg11 | 90.035690 | 2.944553 | 0.000000e+00 | 0.000000e+00 | |
| 2 | Kcnk13 | 59.093266 | 3.053705 | 0.000000e+00 | 0.000000e+00 |
| Ywhaq | 56.939629 | 2.365501 | 0.000000e+00 | 0.000000e+00 | |
| App | 57.157963 | 1.881855 | 0.000000e+00 | 0.000000e+00 | |
| Mboat1 | 57.354801 | 4.658205 | 0.000000e+00 | 0.000000e+00 | |
| Josd2 | 57.376133 | 2.848171 | 0.000000e+00 | 0.000000e+00 | |
| Kif6 | 57.394062 | 3.345257 | 0.000000e+00 | 0.000000e+00 | |
| Sh3tc2 | 57.528534 | 6.165985 | 0.000000e+00 | 0.000000e+00 | |
| Cipc | 57.682247 | 2.468949 | 0.000000e+00 | 0.000000e+00 | |
| Tcf12 | 57.921982 | 2.065799 | 0.000000e+00 | 0.000000e+00 | |
| Shtn1 | 58.287922 | 2.404106 | 0.000000e+00 | 0.000000e+00 | |
| Kctd3 | 58.620754 | 2.627975 | 0.000000e+00 | 0.000000e+00 | |
| Tsc22d3 | 59.000103 | 3.219590 | 0.000000e+00 | 0.000000e+00 | |
| Rhog | 56.667194 | 4.276067 | 0.000000e+00 | 0.000000e+00 | |
| Lgi3 | 59.205410 | 2.762129 | 0.000000e+00 | 0.000000e+00 | |
| Selenop | 59.312222 | 3.112374 | 0.000000e+00 | 0.000000e+00 | |
| 8 | Tgfbr1 | 137.340317 | 10.769904 | 0.000000e+00 | 0.000000e+00 |
| Hexb | 119.997032 | 8.486044 | 0.000000e+00 | 0.000000e+00 | |
| Mertk | 116.801888 | 9.555871 | 0.000000e+00 | 0.000000e+00 | |
| Cst3 | 112.845932 | 7.582945 | 0.000000e+00 | 0.000000e+00 | |
| Inpp5d | 111.094070 | 12.416537 | 0.000000e+00 | 0.000000e+00 | |
| Srgap2 | 110.366783 | 6.584876 | 0.000000e+00 | 0.000000e+00 | |
| Dock8 | 82.764763 | 11.058674 | 0.000000e+00 | 0.000000e+00 | |
| Ly86 | 77.547310 | 10.962577 | 0.000000e+00 | 0.000000e+00 | |
| Mef2a | 77.216774 | 5.439650 | 0.000000e+00 | 0.000000e+00 | |
| Dock2 | 75.843651 | 10.114684 | 0.000000e+00 | 0.000000e+00 | |
| Fyb | 75.524727 | 10.855298 | 0.000000e+00 | 0.000000e+00 | |
| Qk | 63.830379 | 3.882025 | 0.000000e+00 | 0.000000e+00 | |
| Gpr34 | 63.233471 | 9.749660 | 0.000000e+00 | 0.000000e+00 | |
| Lrmda | 63.180691 | 6.527174 | 0.000000e+00 | 0.000000e+00 | |
| Ccnd3 | 62.310600 | 7.042611 | 0.000000e+00 | 0.000000e+00 | |
| 7 | Ptprz1 | 280.503510 | 9.448026 | 0.000000e+00 | 0.000000e+00 |
| Lhfpl3 | 263.645355 | 8.650301 | 0.000000e+00 | 0.000000e+00 | |
| Sox6 | 257.039917 | 10.220982 | 0.000000e+00 | 0.000000e+00 | |
| Grid2 | 142.837418 | 3.889154 | 0.000000e+00 | 0.000000e+00 | |
| Npas3 | 138.383698 | 6.207528 | 0.000000e+00 | 0.000000e+00 | |
| Epn2 | 130.680801 | 6.545508 | 0.000000e+00 | 0.000000e+00 | |
| Sox2ot | 130.439926 | 7.050704 | 0.000000e+00 | 0.000000e+00 | |
| Sgcd | 129.128464 | 6.576615 | 0.000000e+00 | 0.000000e+00 | |
| Slc35f1 | 125.385567 | 5.852885 | 0.000000e+00 | 0.000000e+00 | |
| Pdgfra | 124.127045 | 12.550479 | 0.000000e+00 | 0.000000e+00 | |
| Qk | 120.170456 | 4.371033 | 0.000000e+00 | 0.000000e+00 | |
| Nrxn1 | 112.409256 | 3.198757 | 0.000000e+00 | 0.000000e+00 | |
| Adgrl3 | 89.407158 | 2.946114 | 0.000000e+00 | 0.000000e+00 | |
| Megf11 | 88.544106 | 7.980099 | 0.000000e+00 | 0.000000e+00 | |
| Ntm | 87.639534 | 3.522771 | 0.000000e+00 | 0.000000e+00 | |
| 6 | Agap2 | 50.225891 | 2.618914 | 0.000000e+00 | 0.000000e+00 |
| Ahcyl2 | 50.588196 | 2.080101 | 0.000000e+00 | 0.000000e+00 | |
| Sh3kbp1 | 50.558674 | 2.299337 | 0.000000e+00 | 0.000000e+00 | |
| Cd47 | 50.503082 | 2.158016 | 0.000000e+00 | 0.000000e+00 | |
| Pten | 50.455803 | 2.041422 | 0.000000e+00 | 0.000000e+00 | |
| Ttyh1 | 50.349094 | 2.331339 | 0.000000e+00 | 0.000000e+00 | |
| Gm50024 | 50.348148 | 4.869073 | 0.000000e+00 | 0.000000e+00 | |
| Cux1 | 50.324490 | 2.190573 | 0.000000e+00 | 0.000000e+00 | |
| Far1 | 50.277599 | 2.102910 | 0.000000e+00 | 0.000000e+00 | |
| Prelid3a | 50.617641 | 2.529395 | 0.000000e+00 | 0.000000e+00 | |
| Ncdn | 51.535789 | 2.577785 | 0.000000e+00 | 0.000000e+00 | |
| Tpm1 | 51.532272 | 2.544296 | 0.000000e+00 | 0.000000e+00 | |
| Fkbp1a | 51.465649 | 2.445119 | 0.000000e+00 | 0.000000e+00 | |
| Nfx1 | 51.442070 | 2.360921 | 0.000000e+00 | 0.000000e+00 | |
| C2cd2l | 51.227360 | 2.870124 | 0.000000e+00 | 0.000000e+00 | |
| 3 | Zbtb20 | 114.934196 | 4.832954 | 0.000000e+00 | 0.000000e+00 |
| Cpq | 40.541790 | 5.582988 | 2.805311e-268 | 3.491339e-267 | |
| Utrn | 36.233761 | 4.225976 | 4.815009e-226 | 5.067797e-225 | |
| Airn | 35.226917 | 5.072333 | 1.722178e-215 | 1.727697e-214 | |
| Phldb2 | 33.469276 | 5.959682 | 2.617537e-198 | 2.432079e-197 | |
| Cped1 | 33.102448 | 6.261213 | 9.003496e-195 | 8.252529e-194 | |
| Pard3b | 32.873741 | 4.236024 | 4.643844e-193 | 4.224791e-192 | |
| Sdk1 | 32.677834 | 4.139126 | 2.488597e-191 | 2.242308e-190 | |
| Kcnh8 | 32.520973 | 5.330007 | 2.456451e-189 | 2.185919e-188 | |
| Rora | 31.530457 | 2.573417 | 1.057188e-181 | 8.957121e-181 | |
| 9530026P05Rik | 31.518534 | 4.679823 | 5.832826e-180 | 4.928234e-179 | |
| Esr1 | 29.709341 | 5.484935 | 5.565380e-163 | 4.344484e-162 | |
| Hmcn1 | 28.951281 | 6.202574 | 4.982675e-156 | 3.738997e-155 | |
| Ptprk | 27.131479 | 2.951380 | 1.800068e-140 | 1.237446e-139 | |
| Greb1 | 26.736805 | 6.318375 | 2.751495e-136 | 1.829684e-135 | |
| 4 | Aldh1l1 | 47.545361 | 5.246835 | 0.000000e+00 | 0.000000e+00 |
| Fmn2 | 46.399601 | 2.557196 | 0.000000e+00 | 0.000000e+00 | |
| Hif3a | 46.974819 | 5.359393 | 0.000000e+00 | 0.000000e+00 | |
| Nrcam | 47.036221 | 2.432620 | 0.000000e+00 | 0.000000e+00 | |
| Prrx1 | 47.117741 | 5.187209 | 0.000000e+00 | 0.000000e+00 | |
| Ston2 | 47.203751 | 3.795210 | 0.000000e+00 | 0.000000e+00 | |
| Psd2 | 47.237278 | 3.946349 | 0.000000e+00 | 0.000000e+00 | |
| Folh1 | 47.321003 | 3.813559 | 0.000000e+00 | 0.000000e+00 | |
| Selenop | 47.366066 | 3.567869 | 0.000000e+00 | 0.000000e+00 | |
| Adhfe1 | 46.309509 | 5.024072 | 0.000000e+00 | 0.000000e+00 | |
| Timp3 | 47.647545 | 3.607164 | 0.000000e+00 | 0.000000e+00 | |
| Gna13 | 47.824993 | 3.409621 | 0.000000e+00 | 0.000000e+00 | |
| Vegfa | 47.843952 | 3.688154 | 0.000000e+00 | 0.000000e+00 | |
| Msmo1 | 48.290367 | 3.307666 | 0.000000e+00 | 0.000000e+00 | |
| Son | 48.297272 | 2.408797 | 0.000000e+00 | 0.000000e+00 |
Annotate by Marker Gene Overlap¶
Example of the Expected Marker Definition Format
markers_predefined = {
"CD4 T cells": {"IL7R"},
"CD14+ Monocytes": {"CD14", "LYZ"},
"B cells": {"MS4A1"},
"CD8 T cells": {"CD8A"},
"NK cells": {"GNLY", "NKG7"},
"FCGR3A+ Monocytes": {"FCGR3A", "MS4A7"},
"Dendritic Cells": {"FCER1A", "CST3"},
"Megakaryocytes": {"PPBP"},
}
# Annotate
marker_matches = self.annotate(
mks_collapsed,
# celltypes_superhierarchical=celltypes_superhierarchical,
col_celltype=col_celltype, col_celltype_new="annotation_by_overlap",
top_n_markers=50, # can only have this one or `adj_pval_threshold`
# adj_pval_threshold=1e-10,
# method="overlap_coef",
method="overlap_count",
overwrite=True)
# Rename Any Labels As Pre-Specified
if rename_marker_based_annotation is not None:
self.rna.obs.loc[:, "annotation_by_overlap"] = self.rna.obs[
"annotation_by_overlap"].replace(rename_marker_based_annotation)
# Print & Plot Results
self.plot(kind="umap", color="annotation_by_overlap", wspace=0.4)
print(round(self.rna.obs["annotation_by_overlap"].value_counts(
normalize=True) * 100, 2))
round(self.rna.obs[[col_celltype, "annotation_by_overlap"]
].value_counts(normalize=True)* 100, 2).sort_values()
annotation_by_overlap Neuron 66.99 Oligodendrocyte 17.87 Astrocyte 6.93 Microglial 3.10 OPC 3.02 Endothelial 2.09 Name: proportion, dtype: float64
leiden annotation_by_overlap 3 Endothelial 2.09 7 OPC 3.02 8 Microglial 3.10 4 Astrocyte 6.93 6 Neuron 8.48 2 Oligodendrocyte 17.87 5 Neuron 18.66 1 Neuron 19.02 0 Neuron 20.84 Name: proportion, dtype: float64
Annotate with ToppGene¶
# Options
min_genes = 2 # minimum markers that have to overlap between Leiden & atlas
remove_strings = ["----L1-6", # "---[|]M.*",
"facs-", "-nan-",
# "-i_Gaba_3-.*",
"Brain_organoid-organoid_Kanton_Nature-Organoid-..-",
# "Non-neuronal-Macroglial-((^|)(Oligo|Astro))+-",
# "-Glut_E.*IL7R",
"cells hierarchy compared to all cells using T-S.*",
".*-organoid_Tanaka_cellReport-.+-",
"...BrainAtlas -.*", "-eN2.*", "...Sample groups.*",
"...Sample Type, Dataset.*",
"-Neuronal",
" // Primary Cells by Cluster",
".World...Primary Cells by Cluster",
"Brain_organoid-organoid_Velasco_nature-6_",
"Fetal_brain-fetalBrain_Zhong_nature-....-",
"Somatosensory_Cortex_....-Neuronal-",
"Non-neuronal-Non-dividing-",
"...Sample groups..6 Anatomical region groups., with 5.*",
"Brain_organoid-organoid_Paulsen_bioRxiv-",
"-Glut_E_(THEMIS)", "[(]THEMIS[)]", # "[|].*",
"- method, tissue, subtissue, age, lineage.*"]
drop_name_patterns = ["striatum", "globus", "Entopeduncular",
"Substantia_nigra-", "Thalamus-"]
toppgene_rename_by_pattern = dict(
Inhibitory=["Inh(_|ib)", "GABA"], Excitatory=["Excit", "Glut"],
# Inhibitory=["Inh(_|ib)"], Excitatory=["Excit"],
# # Gabaergic=["GABA"], Glutamatergic=["Glut"],
Astrocyte=["Astrocyte","Astroglia", "Macroglial-Astro"],
Microglial=["Microglia", "Micro"],
Endothelial=["Endothelial"],
Oligodendrocyte=[r"^(?=.*oligo)(?!.*poly)(?!.*opc).*"],
OPC=["Polydendrocyte", "OPC"])
drop_regions = [
"Mid-temporal_gyrus_(MTG)", "primary_auditory_cortex_(A1C)",
"Somatosensory_Cortex_(S1)", "Anterior_Cingulate_gyrus_(CgG)",
"Primary_Motor_Cortex_(M1)",
"Mid-temporal_gyrus_(MTG)|Mid-temporal_gyrus_(MTG)",
"primary_auditory_cortex_(A1C)|primary_auditory_cortex_(A1C)",
"Somatosensory_Cortex_(S1)|Somatosensory_Cortex_(S1)",
"Anterior_Cingulate_gyrus_(CgG)|Anterior_Cingulate_gyrus_(CgG)",
"Primary_Motor_Cortex_(M1)|Primary_Motor_Cortex_(M1)",
r"Neuronal|World / ",
"Primary_Visual_cortex_(V1C)|Primary_Visual_cortex_(V1C)",
"mon",
"BMP_responsible_cell|6m", "bearing_cell|6m", "bearing_cell|GW16", "11",
"Non-neuronal-Non-dividing",
"Frontal_cortex|Frontal_cortex",
"Primary_Visual_cortex_(V1C)", "Substantia_nigra",
"Thalamus", "Hippocampus", "Frontal_cortex"
] # remove if name is just a region or top-level hierarchical/undesired type
drop_regions = drop_regions + [f"{i}-Non-neuronal" for i in drop_regions]
# Query ToppGene
results_toppgene = scflow.pp.annotate_by_toppgene(
markers_dict, remove_strings=remove_strings,
species=species, min_genes=min_genes, source_patterns=source_patterns)
# Remove or Alter Certain Name Patterns
drop_names = results_toppgene.Name.apply(lambda x: not any((
i.lower() in x.lower() for i in drop_name_patterns)))
results_toppgene = results_toppgene[drop_names]
rn_tg = results_toppgene.Name.apply(lambda x: {x: " | ".join([
j for j in toppgene_rename_by_pattern if any((re.search(i.lower(
), x.lower()) is not None for i in toppgene_rename_by_pattern[
j]))])}).apply(lambda x: np.nan if x[list(x.keys())[
0]] == "" else x).dropna().reset_index(drop=True).apply(
lambda x: pd.Series(x)).stack().reset_index(
0, drop=True) # renaming guide
results_toppgene = results_toppgene.replace({"Name": dict(rn_tg)})
results_toppgene = results_toppgene[~results_toppgene.Name.isin(drop_regions)]
# Map Labels (Plurality Vote If Sufficient or Top)
top_cs = dict(results_toppgene.groupby(
"Gene Set").apply(lambda x: x.Name.iloc[:10].value_counts().index.values[
0] if x.Name.iloc[:10].value_counts(
normalize=True).iloc[0] >= 0.25 else x.Name[0]))
print("\n".join([f"{k}: {top_cs[k]}" for k in top_cs]), "\n\n")
if "annotation_toppgene" in self.rna.obs:
self.rna.obs = self.rna.obs.drop("annotation_toppgene", axis=1)
self.rna.obs = self.rna.obs.join(self.rna.obs[col_celltype].replace(
top_cs).to_frame("annotation_toppgene")).loc[self.rna.obs.index]
# Display Results
if "annotation_by_overlap" in self.rna.obs:
print(round(self.rna.obs[["annotation_toppgene", "annotation_by_overlap"]
].value_counts(normalize=True).sort_index(
) * 100, 2), "\n\n")
print(round(self.rna.obs["annotation_toppgene"].value_counts(
normalize=True) * 100, 2))
results_toppgene.reset_index("ID", drop=True).drop([
"QValueBonferroni", "QValueFDRBY", "QValueFDRBH",
"TotalGenes", "Genes"], axis=1)
Server returned HTTP status code: 400
Content: {'error': 'No valid lookup found for symbol Qk'}
Server returned HTTP status code: 400
Content: {'error': 'No valid lookup found for symbol Fyb'}
Server returned HTTP status code: 400
Content: {'error': 'No valid lookup found for symbol Qk'}
0: OPC
2: Oligodendrocyte
3: Excitatory
4: Astrocyte
6: Excitatory
7: OPC
8: Microglial
annotation_toppgene annotation_by_overlap
OPC Neuron 20.84
OPC 3.02
1 Neuron 19.02
Oligodendrocyte Oligodendrocyte 17.87
Excitatory Neuron 8.48
Endothelial 2.09
Astrocyte Astrocyte 6.93
5 Neuron 18.66
Microglial Microglial 3.10
Name: proportion, dtype: float64
annotation_toppgene
OPC 23.86
1 19.02
5 18.66
Oligodendrocyte 17.87
Excitatory 10.57
Astrocyte 6.93
Microglial 3.10
Name: proportion, dtype: float64
| Name | PValue | GenesInTerm | GenesInQuery | GenesInTermInQuery | Source | Name_Original | percent_atlas_genes_in_query | Symbols | ||
|---|---|---|---|---|---|---|---|---|---|---|
| Gene Set | Category | |||||||||
| 0 | ToppCell | Excitatory | 0.0 | 195 | 14 | 9 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... | 4.615385 | [CELF2, SYT1, NRG1, KCNIP4, MEG3, RBFOX1, PTPR... |
| ToppCell | OPC | 0.0 | 200 | 14 | 6 | Human Adult Brain MTG | Macroglial-Polydendrocytes-PDGFRA---|Macroglia... | 3.0 | [KCNIP4, SGCZ, FGF14, MEG3, RBFOX1, CSMD1] | |
| ToppCell | OPC | 0.0 | 200 | 14 | 6 | Human Adult Brain MTG | Macroglial-Polydendrocytes-PDGFRA-|Macroglial ... | 3.0 | [KCNIP4, SGCZ, FGF14, MEG3, RBFOX1, CSMD1] | |
| ToppCell | OPC | 0.0 | 200 | 14 | 6 | Human Adult Brain MTG | Macroglial-Polydendrocytes-PDGFRA--|Macroglial... | 3.0 | [KCNIP4, SGCZ, FGF14, MEG3, RBFOX1, CSMD1] | |
| ToppCell | OPC | 0.0 | 200 | 14 | 6 | Human Adult Brain MTG | Macroglial-Polydendrocytes-PDGFRA----L1-6|Macr... | 3.0 | [KCNIP4, SGCZ, FGF14, MEG3, RBFOX1, CSMD1] | |
| ToppCell | OPC | 0.0 | 200 | 14 | 6 | Human Adult Brain MTG | Macroglial-Polydendrocytes-PDGFRA|Macroglial /... | 3.0 | [KCNIP4, SGCZ, FGF14, MEG3, RBFOX1, CSMD1] | |
| ToppCell | OPC | 0.0 | 200 | 14 | 6 | Human Adult Brain MTG | Macroglial-Polydendrocytes|Macroglial / cells ... | 3.0 | [KCNIP4, SGCZ, FGF14, MEG3, RBFOX1, CSMD1] | |
| ToppCell | Substantia_nigra|Substantia_nigra | 0.0 | 186 | 14 | 5 | Mouse Adult Brain Overview (690k cells, 9 regi... | Substantia_nigra-Neuronal|Substantia_nigra / B... | 2.688172 | [SYT1, KCNIP4, MEG3, NRXN3, CNTNAP2] | |
| ToppCell | Excitatory | 0.0 | 192 | 14 | 5 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Neuronal-Glutamaterg... | 2.604167 | [CELF2, NRG1, KCNIP4, DPP10, PTPRD] | |
| ToppCell | Inhibitory | 0.0 | 194 | 14 | 5 | Comparison of human cortex and organoids | Neuron-Postmitotic-Inhibitory_Neuron-MGE2|Worl... | 2.57732 | [SYT1, FGF14, MEG3, NRXN3, CNTNAP2] | |
| ToppCell | Inhibitory | 0.0 | 194 | 14 | 5 | Comparison of human cortex and organoids | Neuron-Postmitotic-Inhibitory_Neuron-MGE2-10|W... | 2.57732 | [SYT1, FGF14, MEG3, NRXN3, CNTNAP2] | |
| ToppCell | Neuron | 0.0 | 199 | 14 | 5 | Comparison of human cortex and organoids | Neuron|World / Primary Cells by Cluster | 2.512563 | [CELF2, SYT1, MEG3, RBFOX1, PTPRD] | |
| ToppCell | Neuron-Postmitotic | 0.0 | 199 | 14 | 5 | Comparison of human cortex and organoids | Neuron-Postmitotic|World / Primary Cells by Cl... | 2.512563 | [CELF2, SYT1, MEG3, RBFOX1, PTPRD] | |
| ToppCell | Excitatory | 0.0 | 200 | 14 | 5 | Comparison of human cortex and organoids | Neuron-Postmitotic-Excitatory_Neuron_-Upper_La... | 2.5 | [CELF2, SYT1, MEG3, RBFOX1, PTPRD] | |
| ToppCell | Fetal_brain-fetalBrain_Zhong_nature-GW16|GW16 | 0.0 | 200 | 14 | 5 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW16-Neuro... | 2.5 | [SYT1, MEG3, RBFOX1, PTPRD, CNTNAP2] | |
| ToppCell | Excitatory | 0.0 | 200 | 14 | 5 | Comparison of human cortex and organoids | Neuron-Postmitotic-Excitatory_Neuron_-Upper_La... | 2.5 | [CELF2, SYT1, MEG3, RBFOX1, PTPRD] | |
| ToppCell | Cortical_neuron|GW09 | 0.0 | 200 | 14 | 5 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-organoid_Tanaka_cellReport-GW09-Ne... | 2.5 | [CELF2, SYT1, RBFOX1, PTPRD, CNTNAP2] | |
| ToppCell | Fetal_brain-fetalBrain_Zhong_nature-GW23|GW23 | 0.0 | 112 | 14 | 4 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW23-Neuro... | 3.571429 | [SYT1, MEG3, NRXN3, RBFOX1] | |
| 2 | ToppCell | Oligodendrocyte | 0.0 | 190 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... | 2.105263 | [SH3TC2, SELENOP, KIF6, MBOAT1] |
| ToppCell | OPC | 0.000008 | 119 | 15 | 3 | Mouse Adult Brain Overview (690k cells, 9 regi... | Hippocampus-Macroglia-POLYDENDROCYTE-P1-P1_1-T... | 2.521008 | [JOSD2, SH3TC2, YWHAQ] | |
| ToppCell | OPC | 0.000008 | 119 | 15 | 3 | Mouse Adult Brain Overview (690k cells, 9 regi... | Hippocampus-Macroglia-POLYDENDROCYTE-P1-P1_1|H... | 2.521008 | [JOSD2, SH3TC2, YWHAQ] | |
| ToppCell | OPC | 0.000008 | 119 | 15 | 3 | Mouse Adult Brain Overview (690k cells, 9 regi... | Hippocampus-Macroglia-POLYDENDROCYTE-P1-P1_1-T... | 2.521008 | [JOSD2, SH3TC2, YWHAQ] | |
| ToppCell | Oligodendrocyte | 0.000009 | 124 | 15 | 3 | Mouse Adult Brain Overview (690k cells, 9 regi... | Posterior_cortex-Macroglia-OLIGODENDROCYTE|Pos... | 2.419355 | [SH3TC2, KCNK13, MBOAT1] | |
| ToppCell | Oligodendrocyte | 0.000011 | 132 | 15 | 3 | Mouse Adult Brain Overview (690k cells, 9 regi... | Hippocampus-Macroglia-OLIGODENDROCYTE-O2-Trf|H... | 2.272727 | [JOSD2, SH3TC2, KCNK13] | |
| ToppCell | Oligodendrocyte | 0.000011 | 132 | 15 | 3 | Mouse Adult Brain Overview (690k cells, 9 regi... | Hippocampus-Macroglia-OLIGODENDROCYTE-O2|Hippo... | 2.272727 | [JOSD2, SH3TC2, KCNK13] | |
| ToppCell | Oligodendrocyte | 0.000012 | 137 | 15 | 3 | Mouse Adult Brain Overview (690k cells, 9 regi... | Hippocampus-Macroglia-OLIGODENDROCYTE|Hippocam... | 2.189781 | [JOSD2, SH3TC2, KCNK13] | |
| ToppCell | Oligodendrocyte | 0.000029 | 183 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... | 1.639344 | [SH3TC2, SELENOP, KIF6] | |
| ToppCell | Oligodendrocyte | 0.00003 | 184 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... | 1.630435 | [SH3TC2, SELENOP, KIF6] | |
| ToppCell | Oligodendrocyte | 0.00003 | 185 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 1.621622 | [SH3TC2, SELENOP, KIF6] | |
| ToppCell | Oligodendrocyte | 0.00003 | 185 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 1.621622 | [SH3TC2, SELENOP, KIF6] | |
| ToppCell | Oligodendrocyte | 0.00003 | 186 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... | 1.612903 | [SH3TC2, SELENOP, MBOAT1] | |
| ToppCell | Oligodendrocyte | 0.000031 | 187 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 1.604278 | [SH3TC2, SELENOP, KIF6] | |
| ToppCell | Oligodendrocyte | 0.000031 | 187 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 1.604278 | [SH3TC2, SELENOP, KIF6] | |
| ToppCell | Oligodendrocyte | 0.000032 | 190 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 1.578947 | [SH3TC2, SELENOP, MBOAT1] | |
| ToppCell | Excitatory | 0.000036 | 197 | 15 | 3 | Comparison of human cortex and organoids | Neuron-Postmitotic-Excitatory_Neuron_-PFC|Worl... | 1.522843 | [SHTN1, YWHAQ, APP] | |
| ToppCell | 3.5_mon|organoid_Paulsen_bioRxiv | 0.000036 | 197 | 15 | 3 | Integration of Four Brain Organoid Datasets an... | Brain_organoid-organoid_Paulsen_bioRxiv-3.5_mo... | 1.522843 | [SHTN1, JOSD2, TSC22D3] | |
| ToppCell | Excitatory | 0.000036 | 197 | 15 | 3 | Comparison of human cortex and organoids | Neuron-Postmitotic-Excitatory_Neuron_-Upper_La... | 1.522843 | [SHTN1, YWHAQ, APP] | |
| ToppCell | Excitatory | 0.000036 | 197 | 15 | 3 | Comparison of human cortex and organoids | Neuron-Postmitotic-Excitatory_Neuron_-Upper_La... | 1.522843 | [SHTN1, YWHAQ, APP] | |
| ToppCell | Excitatory | 0.000036 | 197 | 15 | 3 | Comparison of human cortex and organoids | Neuron-Postmitotic-Excitatory_Neuron_-PFC-29|W... | 1.522843 | [SHTN1, YWHAQ, APP] | |
| ToppCell | Oligodendrocyte | 0.000038 | 200 | 15 | 3 | Human Adult Brain MTG | Macroglial-Oligodendrocytes|Macroglial / cells... | 1.5 | [SHTN1, SH3TC2, KIF6] | |
| ToppCell | Oligodendrocyte | 0.000038 | 200 | 15 | 3 | Human Adult Brain MTG | Macroglial-Oligodendrocytes-OPALIN---|Macrogli... | 1.5 | [SHTN1, SH3TC2, KIF6] | |
| ToppCell | Oligodendrocyte | 0.000038 | 200 | 15 | 3 | Human Adult Brain MTG | Macroglial-Oligodendrocytes-OPALIN-|Macroglial... | 1.5 | [SHTN1, SH3TC2, KIF6] | |
| ToppCell | Oligodendrocyte | 0.000038 | 200 | 15 | 3 | Human Adult Brain MTG | Macroglial-Oligodendrocytes-OPALIN----L1-6|Mac... | 1.5 | [SHTN1, SH3TC2, KIF6] | |
| ToppCell | Oligodendrocyte | 0.000038 | 200 | 15 | 3 | Human Adult Brain MTG | Macroglial-Oligodendrocytes-OPALIN--|Macroglia... | 1.5 | [SHTN1, SH3TC2, KIF6] | |
| ToppCell | Oligodendrocyte | 0.000038 | 200 | 15 | 3 | Human Adult Brain MTG | Macroglial-Oligodendrocytes-OPALIN|Macroglial ... | 1.5 | [SHTN1, SH3TC2, KIF6] | |
| ToppCell | Oligodendrocyte | 0.000465 | 96 | 15 | 2 | Mouse Adult Brain Overview (690k cells, 9 regi... | Cerebellum-Macroglia-OLIGODENDROCYTE-O2-Trf-Ol... | 2.083333 | [KIF6, KCNK13] | |
| ToppCell | Oligodendrocyte | 0.000465 | 96 | 15 | 2 | Mouse Adult Brain Overview (690k cells, 9 regi... | Cerebellum-Macroglia-OLIGODENDROCYTE-O2-Trf|Ce... | 2.083333 | [KIF6, KCNK13] | |
| ToppCell | Oligodendrocyte | 0.000465 | 96 | 15 | 2 | Mouse Adult Brain Overview (690k cells, 9 regi... | Cerebellum-Macroglia-OLIGODENDROCYTE-O2|Cerebe... | 2.083333 | [KIF6, KCNK13] | |
| ToppCell | Oligodendrocyte | 0.000465 | 96 | 15 | 2 | Mouse Adult Brain Overview (690k cells, 9 regi... | Cerebellum-Macroglia-OLIGODENDROCYTE-O2-Trf-Ol... | 2.083333 | [KIF6, KCNK13] | |
| ToppCell | OPC | 0.00062 | 111 | 15 | 2 | Mouse Adult Brain Overview (690k cells, 9 regi... | Hippocampus-Macroglia-POLYDENDROCYTE-P1|Hippoc... | 1.801802 | [JOSD2, SH3TC2] | |
| ToppCell | Fetal_brain-fetalBrain_Zhong_nature-GW23|GW23 | 0.000631 | 112 | 15 | 2 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW23-Neuro... | 1.785714 | [SHTN1, YWHAQ] | |
| ToppCell | Oligodendrocyte | 0.000665 | 115 | 15 | 2 | Mouse Adult Brain Overview (690k cells, 9 regi... | Cerebellum-Macroglia-OLIGODENDROCYTE|Cerebellu... | 1.73913 | [KIF6, KCNK13] | |
| 3 | ToppCell | Excitatory | 0.0 | 200 | 13 | 4 | Human Adult Brain MTG | Neuronal-Excitatory-eC(RORB)|Neuronal / cells ... | 2.0 | [PTPRK, RORA, ESR1, PHLDB2] |
| ToppCell | Excitatory | 0.000015 | 170 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Neuronal-Glutam... | 1.764706 | [PTPRK, PHLDB2, KCNH8] | |
| ToppCell | Excitatory | 0.000016 | 173 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... | 1.734104 | [PTPRK, UTRN, PHLDB2] | |
| ToppCell | Excitatory | 0.000016 | 174 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... | 1.724138 | [PTPRK, UTRN, PHLDB2] | |
| ToppCell | Excitatory | 0.000016 | 176 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Neuronal-Glutama... | 1.704545 | [PTPRK, UTRN, PHLDB2] | |
| ToppCell | Excitatory | 0.000017 | 178 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... | 1.685393 | [PTPRK, RORA, ESR1] | |
| ToppCell | Excitatory | 0.000017 | 179 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Neuronal-Glutama... | 1.675978 | [UTRN, RORA, PHLDB2] | |
| ToppCell | Excitatory | 0.000017 | 180 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Neuronal-Glutama... | 1.666667 | [UTRN, PHLDB2, KCNH8] | |
| ToppCell | Excitatory | 0.000017 | 180 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... | 1.666667 | [UTRN, RORA, PHLDB2] | |
| ToppCell | Inhibitory | 0.000018 | 183 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Neuronal-Inh_GABAe... | 1.639344 | [CPED1, SDK1, GREB1] | |
| ToppCell | Excitatory | 0.000018 | 183 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Neuronal-Glutamatergi... | 1.639344 | [PTPRK, RORA, ESR1] | |
| ToppCell | Excitatory | 0.000019 | 185 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Neuronal-Glutamaterg... | 1.621622 | [UTRN, PHLDB2, KCNH8] | |
| ToppCell | Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... | 0.00002 | 188 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... | 1.595745 | [PARD3B, ZBTB20, KCNH8] | |
| ToppCell | Inhibitory | 0.00002 | 188 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Neuronal-Inh_GABAerg... | 1.595745 | [CPED1, SDK1, GREB1] | |
| ToppCell | Somatosensory_Cortex_(S1)-Non-neuronal|Somatos... | 0.00002 | 189 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal|Somatos... | 1.587302 | [PARD3B, ZBTB20, KCNH8] | |
| ToppCell | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 0.00002 | 189 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 1.587302 | [PARD3B, ZBTB20, KCNH8] | |
| ToppCell | primary_auditory_cortex_(A1C)-Non-neuronal|pri... | 0.00002 | 189 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal|pri... | 1.587302 | [PARD3B, ZBTB20, KCNH8] | |
| ToppCell | Inhibitory | 0.000021 | 190 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Neuronal-Inh_GABAerg... | 1.578947 | [CPED1, SDK1, GREB1] | |
| ToppCell | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 0.000021 | 190 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 1.578947 | [PARD3B, ZBTB20, KCNH8] | |
| ToppCell | Excitatory | 0.000021 | 190 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... | 1.578947 | [PTPRK, RORA, KCNH8] | |
| ToppCell | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... | 0.000021 | 190 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... | 1.578947 | [PARD3B, ZBTB20, KCNH8] | |
| ToppCell | Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... | 0.000021 | 190 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... | 1.578947 | [PARD3B, ZBTB20, KCNH8] | |
| ToppCell | Primary_Motor_Cortex_(M1)-Non-neuronal|Primary... | 0.000021 | 191 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal|Primary... | 1.570681 | [PARD3B, ZBTB20, KCNH8] | |
| ToppCell | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal|An... | 0.000021 | 191 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal|An... | 1.570681 | [PARD3B, ZBTB20, KCNH8] | |
| ToppCell | Mid-temporal_gyrus_(MTG)-Non-neuronal|Mid-temp... | 0.000021 | 191 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal|Mid-temp... | 1.570681 | [PARD3B, ZBTB20, KCNH8] | |
| ToppCell | Excitatory | 0.000021 | 192 | 13 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Neuronal-Glutamate... | 1.5625 | [PTPRK, RORA, KCNH8] | |
| 4 | ToppCell | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 0.0 | 189 | 15 | 5 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 2.645503 | [FOLH1, SELENOP, HIF3A, ALDH1L1, STON2] |
| ToppCell | Astrocyte | 0.0 | 200 | 15 | 5 | Human Adult Brain MTG | Macroglial-Astrocytes-SLC14A1|Macroglial / cel... | 2.5 | [FMN2, HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 200 | 15 | 5 | Human Adult Brain MTG | Macroglial-Astrocytes|Macroglial / cells hiera... | 2.5 | [FMN2, HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Macroglial|World / | 0.0 | 200 | 15 | 5 | Human Adult Brain MTG | Macroglial|World / cells hierarchy compared to... | 2.5 | [FOLH1, PRRX1, HIF3A, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 200 | 15 | 5 | Human Adult Brain MTG | Macroglial-Astrocytes-SLC14A1---|Macroglial / ... | 2.5 | [FMN2, HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 200 | 15 | 5 | Human Adult Brain MTG | Macroglial-Astrocytes-SLC14A1----L1-6|Macrogli... | 2.5 | [FMN2, HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 200 | 15 | 5 | Human Adult Brain MTG | Macroglial-Astrocytes-SLC14A1--|Macroglial / c... | 2.5 | [FMN2, HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 200 | 15 | 5 | Human Adult Brain MTG | Macroglial-Astrocytes-SLC14A1-|Macroglial / ce... | 2.5 | [FMN2, HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 186 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... | 2.150538 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 187 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 2.139037 | [PRRX1, HIF3A, PSD2, STON2] | |
| ToppCell | Astrocyte | 0.0 | 187 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... | 2.139037 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 187 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... | 2.139037 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 187 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... | 2.139037 | [PRRX1, HIF3A, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 187 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... | 2.139037 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... | 0.0 | 188 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... | 2.12766 | [FOLH1, HIF3A, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 190 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... | 2.105263 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 190 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 2.105263 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 190 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 2.105263 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 190 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... | 2.105263 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 190 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 2.105263 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... | 0.0 | 190 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... | 2.105263 | [FOLH1, HIF3A, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 190 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 2.105263 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Primary_Motor_Cortex_(M1)-Non-neuronal|Primary... | 0.0 | 191 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal|Primary... | 2.094241 | [FOLH1, SELENOP, HIF3A, STON2] | |
| ToppCell | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal|An... | 0.0 | 191 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal|An... | 2.094241 | [FOLH1, HIF3A, ALDH1L1, STON2] | |
| ToppCell | Mid-temporal_gyrus_(MTG)-Non-neuronal|Mid-temp... | 0.0 | 191 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal|Mid-temp... | 2.094241 | [FOLH1, HIF3A, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 192 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 2.083333 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 192 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... | 2.083333 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 192 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 2.083333 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 193 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... | 2.072539 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.0 | 193 | 15 | 4 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... | 2.072539 | [HIF3A, PSD2, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.000024 | 172 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... | 1.744186 | [HIF3A, PSD2, STON2] | |
| ToppCell | Astrocyte | 0.000025 | 174 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 1.724138 | [PRRX1, HIF3A, STON2] | |
| ToppCell | Posterior_cortex-Macroglia|Posterior_cortex | 0.000025 | 175 | 15 | 3 | Mouse Adult Brain Overview (690k cells, 9 regi... | Posterior_cortex-Macroglia|Posterior_cortex / ... | 1.714286 | [FOLH1, ADHFE1, STON2] | |
| ToppCell | Astrocyte | 0.000028 | 181 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 1.657459 | [HIF3A, PSD2, STON2] | |
| ToppCell | Astrocyte | 0.00003 | 184 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... | 1.630435 | [HIF3A, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.00003 | 186 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... | 1.612903 | [HIF3A, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.000031 | 187 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... | 1.604278 | [HIF3A, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.000031 | 187 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... | 1.604278 | [HIF3A, ALDH1L1, STON2] | |
| ToppCell | Astrocyte | 0.000031 | 187 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... | 1.604278 | [HIF3A, ALDH1L1, STON2] | |
| ToppCell | Somatosensory_Cortex_(S1)-Non-neuronal|Somatos... | 0.000032 | 189 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal|Somatos... | 1.587302 | [FOLH1, HIF3A, STON2] | |
| ToppCell | primary_auditory_cortex_(A1C)-Non-neuronal|pri... | 0.000032 | 189 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal|pri... | 1.587302 | [FOLH1, HIF3A, STON2] | |
| ToppCell | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 0.000032 | 190 | 15 | 3 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 1.578947 | [FOLH1, SELENOP, HIF3A] | |
| 6 | ToppCell | Neuronal|GW08 | 0.000152 | 59 | 14 | 2 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-organoid_Tanaka_cellReport-GW08-Ne... | 3.389831 | [PTEN, NCDN] |
| ToppCell | Fetal_brain-fetalBrain_Zhong_nature-GW08|GW08 | 0.00028 | 80 | 14 | 2 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW08-Neuro... | 2.5 | [PTEN, NCDN] | |
| ToppCell | Neuronal-ventral_progenitors_and_neurons_1|Org... | 0.001182 | 165 | 14 | 2 | Integration of Four Brain Organoid Datasets an... | Brain_organoid-organoid_Kanton_Nature-Organoid... | 1.212121 | [NCDN, AGAP2] | |
| ToppCell | Excitatory | 0.001225 | 168 | 14 | 2 | Mouse Adult Brain Overview (690k cells, 9 regi... | Hippocampus-Neuronal-Excitatory-eN1(Slc17a7)|H... | 1.190476 | [FKBP1A, NCDN] | |
| ToppCell | Excitatory | 0.001545 | 189 | 14 | 2 | Mouse Adult Brain Overview (690k cells, 9 regi... | Hippocampus-Neuronal-Excitatory|Hippocampus / ... | 1.058201 | [NCDN, AGAP2] | |
| 7 | ToppCell | OPC | 0.0 | 189 | 13 | 8 | Comparison of human cortex and organoids | Non-neuronal-Non-dividing-OPC|World / Primary ... | 4.232804 | [SOX6, PDGFRA, PTPRZ1, SGCD, GRID2, LHFPL3, EP... |
| ToppCell | OPC | 0.0 | 189 | 13 | 8 | Comparison of human cortex and organoids | Non-neuronal-Non-dividing-OPC-OPC|World / Prim... | 4.232804 | [SOX6, PDGFRA, PTPRZ1, SGCD, GRID2, LHFPL3, EP... | |
| ToppCell | OPC | 0.0 | 189 | 13 | 8 | Comparison of human cortex and organoids | Non-neuronal-Non-dividing-OPC-OPC-30|World / P... | 4.232804 | [SOX6, PDGFRA, PTPRZ1, SGCD, GRID2, LHFPL3, EP... | |
| ToppCell | OPC | 0.0 | 187 | 13 | 7 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... | 3.743316 | [SOX6, PDGFRA, PTPRZ1, GRID2, MEGF11, LHFPL3, ... | |
| ToppCell | OPC | 0.0 | 187 | 13 | 7 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Macro... | 3.743316 | [SOX6, PDGFRA, PTPRZ1, GRID2, MEGF11, LHFPL3, ... | |
| ToppCell | OPC | 0.0 | 200 | 13 | 7 | Human Adult Brain MTG | Macroglial-Polydendrocytes-PDGFRA----L1-6|Macr... | 3.5 | [SOX6, PDGFRA, PTPRZ1, GRID2, MEGF11, LHFPL3, ... | |
| ToppCell | OPC | 0.0 | 200 | 13 | 7 | Human Adult Brain MTG | Macroglial-Polydendrocytes-PDGFRA|Macroglial /... | 3.5 | [SOX6, PDGFRA, PTPRZ1, GRID2, MEGF11, LHFPL3, ... | |
| ToppCell | OPC | 0.0 | 200 | 13 | 7 | Human Adult Brain MTG | Macroglial-Polydendrocytes-PDGFRA---|Macroglia... | 3.5 | [SOX6, PDGFRA, PTPRZ1, GRID2, MEGF11, LHFPL3, ... | |
| ToppCell | OPC | 0.0 | 200 | 13 | 7 | Human Adult Brain MTG | Macroglial-Polydendrocytes-PDGFRA-|Macroglial ... | 3.5 | [SOX6, PDGFRA, PTPRZ1, GRID2, MEGF11, LHFPL3, ... | |
| ToppCell | OPC | 0.0 | 200 | 13 | 7 | Human Adult Brain MTG | Macroglial-Polydendrocytes-PDGFRA--|Macroglial... | 3.5 | [SOX6, PDGFRA, PTPRZ1, GRID2, MEGF11, LHFPL3, ... | |
| ToppCell | OPC | 0.0 | 200 | 13 | 7 | Human Adult Brain MTG | Macroglial-Polydendrocytes|Macroglial / cells ... | 3.5 | [SOX6, PDGFRA, PTPRZ1, GRID2, MEGF11, LHFPL3, ... | |
| ToppCell | OPC | 0.0 | 182 | 13 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... | 3.296703 | [SOX6, PDGFRA, PTPRZ1, MEGF11, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 182 | 13 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 3.296703 | [SOX6, PDGFRA, PTPRZ1, MEGF11, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 182 | 13 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-Ma... | 3.296703 | [SOX6, PDGFRA, PTPRZ1, MEGF11, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 182 | 13 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... | 3.296703 | [SOX6, PDGFRA, PTPRZ1, MEGF11, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 182 | 13 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Macrogl... | 3.296703 | [SOX6, PDGFRA, PTPRZ1, MEGF11, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 182 | 13 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Macrogl... | 3.296703 | [SOX6, PDGFRA, PTPRZ1, MEGF11, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 185 | 13 | 6 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 3.243243 | [SOX6, PDGFRA, PTPRZ1, MEGF11, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 185 | 13 | 6 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-Mac... | 3.243243 | [SOX6, PDGFRA, PTPRZ1, MEGF11, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 185 | 13 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... | 3.243243 | [SOX6, PDGFRA, PTPRZ1, MEGF11, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 185 | 13 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Macrogli... | 3.243243 | [SOX6, PDGFRA, PTPRZ1, MEGF11, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 192 | 13 | 5 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW16-OPC_r... | 2.604167 | [SOX6, PDGFRA, SGCD, GRID2, LHFPL3] | |
| ToppCell | OPC | 0.0 | 192 | 13 | 5 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW16-OPC_r... | 2.604167 | [SOX6, PDGFRA, SGCD, GRID2, LHFPL3] | |
| ToppCell | OPC | 0.0 | 200 | 13 | 5 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW26-OPC_r... | 2.5 | [PDGFRA, PTPRZ1, GRID2, LHFPL3, EPN2] | |
| ToppCell | OPC | 0.0 | 200 | 13 | 5 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW26-OPC_r... | 2.5 | [PDGFRA, PTPRZ1, GRID2, LHFPL3, EPN2] | |
| ToppCell | Astrocyte | 0.0 | 200 | 13 | 5 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-organoid_Tanaka_cellReport-GW16-Ma... | 2.5 | [PDGFRA, PTPRZ1, GRID2, LHFPL3, SLC35F1] | |
| 8 | ToppCell | Microglial | 0.0 | 200 | 12 | 8 | Human Adult Brain MTG | Hematolymphoid-Microglia|Hematolymphoid / cell... | 4.0 | [DOCK2, INPP5D, LRMDA, SRGAP2, DOCK8, LY86, ME... |
| ToppCell | Microglial | 0.0 | 200 | 12 | 8 | Human Adult Brain MTG | Hematolymphoid-Microglia-TYROBP----L1-3|Hemato... | 4.0 | [DOCK2, INPP5D, LRMDA, SRGAP2, DOCK8, LY86, ME... | |
| ToppCell | Microglial | 0.0 | 200 | 12 | 8 | Human Adult Brain MTG | Hematolymphoid-Microglia-TYROBP--|Hematolympho... | 4.0 | [DOCK2, INPP5D, LRMDA, SRGAP2, DOCK8, LY86, ME... | |
| ToppCell | Microglial | 0.0 | 200 | 12 | 8 | Human Adult Brain MTG | Hematolymphoid-Microglia-TYROBP---|Hematolymph... | 4.0 | [DOCK2, INPP5D, LRMDA, SRGAP2, DOCK8, LY86, ME... | |
| ToppCell | Microglial | 0.0 | 200 | 12 | 8 | Human Adult Brain MTG | Hematolymphoid-Microglia-TYROBP-|Hematolymphoi... | 4.0 | [DOCK2, INPP5D, LRMDA, SRGAP2, DOCK8, LY86, ME... | |
| ToppCell | Microglial | 0.0 | 200 | 12 | 8 | Human Adult Brain MTG | Hematolymphoid-Microglia-TYROBP|Hematolymphoid... | 4.0 | [DOCK2, INPP5D, LRMDA, SRGAP2, DOCK8, LY86, ME... | |
| ToppCell | Microglial | Endothelial | 0.0 | 187 | 12 | 7 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-End... | 3.743316 | [DOCK2, INPP5D, LRMDA, SRGAP2, DOCK8, GPR34, L... | |
| ToppCell | Microglial | Endothelial | 0.0 | 187 | 12 | 7 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-End... | 3.743316 | [DOCK2, INPP5D, LRMDA, SRGAP2, DOCK8, GPR34, L... | |
| ToppCell | Microglial | Endothelial | 0.0 | 187 | 12 | 7 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-End... | 3.743316 | [DOCK2, INPP5D, LRMDA, SRGAP2, DOCK8, GPR34, L... | |
| ToppCell | Microglial | Endothelial | 0.0 | 188 | 12 | 7 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-En... | 3.723404 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86, MERTK] | |
| ToppCell | Microglial | Endothelial | 0.0 | 188 | 12 | 7 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-En... | 3.723404 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86, MERTK] | |
| ToppCell | Endothelial | 0.0 | 188 | 12 | 7 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-En... | 3.723404 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86, MERTK] | |
| ToppCell | Microglial | Endothelial | 0.0 | 188 | 12 | 7 | Human Adult Multiple Cortical Areas SMART-seq | Anterior_Cingulate_gyrus_(CgG)-Non-neuronal-En... | 3.723404 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86, MERTK] | |
| ToppCell | Endothelial | 0.0 | 190 | 12 | 7 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Endothe... | 3.684211 | [DOCK2, INPP5D, LRMDA, SRGAP2, DOCK8, GPR34, L... | |
| ToppCell | Microglial | 0.0 | 199 | 12 | 7 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW23-Myelo... | 3.517588 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86, MERTK] | |
| ToppCell | Myeloid|GW23 | 0.0 | 199 | 12 | 7 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW23-Myelo... | 3.517588 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86, MERTK] | |
| ToppCell | Microglial | 0.0 | 179 | 12 | 6 | Mouse Adult Brain Overview (690k cells, 9 regi... | Posterior_cortex-Hematopoietic-MICROGLIA|Poste... | 3.351955 | [DOCK2, HEXB, INPP5D, TGFBR1, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 184 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Endot... | 3.26087 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 184 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Endot... | 3.26087 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 184 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Endot... | 3.26087 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Endothelial | 0.0 | 185 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Visual_cortex_(V1C)-Non-neuronal-Endot... | 3.243243 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 186 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Endothe... | 3.225806 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 186 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Endothe... | 3.225806 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 186 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Endothe... | 3.225806 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Endothelial | 0.0 | 187 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Primary_Motor_Cortex_(M1)-Non-neuronal-Endothe... | 3.208556 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Endothelial | 0.0 | 187 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | primary_auditory_cortex_(A1C)-Non-neuronal-End... | 3.208556 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 188 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Endothel... | 3.191489 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 188 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Endothel... | 3.191489 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 188 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Endothel... | 3.191489 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 189 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Endothe... | 3.174603 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Endothelial | 0.0 | 189 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Mid-temporal_gyrus_(MTG)-Non-neuronal-Endothel... | 3.174603 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 189 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Endothe... | 3.174603 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | Endothelial | 0.0 | 189 | 12 | 6 | Human Adult Multiple Cortical Areas SMART-seq | Somatosensory_Cortex_(S1)-Non-neuronal-Endothe... | 3.174603 | [DOCK2, INPP5D, LRMDA, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | 0.0 | 198 | 12 | 6 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW26-Myelo... | 3.030303 | [DOCK2, INPP5D, DOCK8, GPR34, LY86, MERTK] | |
| ToppCell | Myeloid|GW26 | 0.0 | 198 | 12 | 6 | Integration of Four Brain Organoid Datasets an... | Fetal_brain-fetalBrain_Zhong_nature-GW26-Myelo... | 3.030303 | [DOCK2, INPP5D, DOCK8, GPR34, LY86, MERTK] | |
| ToppCell | Microglial | 0.0 | 127 | 12 | 5 | Mouse Adult Brain Overview (690k cells, 9 regi... | Posterior_cortex-Hematopoietic-MICROGLIA-MI2(C... | 3.937008 | [HEXB, INPP5D, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | 0.0 | 166 | 12 | 5 | Mouse Adult Brain Overview (690k cells, 9 regi... | Posterior_cortex-Hematopoietic-MICROGLIA-MI2(C... | 3.012048 | [HEXB, INPP5D, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | 0.0 | 166 | 12 | 5 | Mouse Adult Brain Overview (690k cells, 9 regi... | Posterior_cortex-Hematopoietic-MICROGLIA-MI2(C... | 3.012048 | [HEXB, INPP5D, DOCK8, GPR34, LY86] | |
| ToppCell | Microglial | 0.0 | 166 | 12 | 5 | Mouse Adult Brain Overview (690k cells, 9 regi... | Posterior_cortex-Hematopoietic-MICROGLIA-MI2(C... | 3.012048 | [HEXB, INPP5D, DOCK8, GPR34, LY86] | |
| ToppCell | Posterior_cortex-Hematopoietic|Posterior_cortex | 0.0 | 178 | 12 | 5 | Mouse Adult Brain Overview (690k cells, 9 regi... | Posterior_cortex-Hematopoietic|Posterior_corte... | 2.808989 | [DOCK2, HEXB, INPP5D, GPR34, LY86] |
Annotate with CellTypist¶
# %%time
# # To Aggregate More Specific Cell Types
# celltypist_rename = dict(
# Gabaergic=["GABA"], Glutamatergic=["Glut"],
# Dopaminergic=["Dopa"], Serotonergic=["Sero"],
# Inhibitory=["Inh"], Excitatory=["Exc"],
# Astrocyte=["Astro"],
# Microglia=["Microglia"],
# Pericyte=["peri"],
# Monocyte=["Monocyte"],
# Lymphoid=["Lymphoid"],
# Endothelial=["Endothelial"],
# Oligodendrocyte=[r"^(?=.*oligo)(?!.*poly)(?!.*opc).*"],
# Polydendrocyte=["OPC"])
# # Run CellTypist
# self.rna.X = self.rna.layers["counts"].copy()
# sc.pp.normalize_total(self.rna, target_sum=10000)
# sc.pp.log1p(self.rna) # copy=True: do not update adata.X
# predictions = self.annotate(
# model_celltypist, col_celltype=col_celltype, layer=None,
# col_celltype_new="", majority_voting=True, min_prop=0.5, use_GPU=True)
# if "majority_voting" in self.rna.obs:
# self.rna.obs.loc[:, "majority_voting_short"] = self.rna.obs[
# "majority_voting"].apply(lambda x: " ".join(x.split(
# " ")[1:]) if all((i in [str(i) for i in np.arange(
# 0, 10)] for i in x.split(" ")[
# 0])) else x) # drop pointless #s in front of cell types
# # Rename Cell Types
# rn_ct = predictions.predicted_labels.groupby("majority_voting").apply(
# lambda x: {x.name: " | ".join([
# j for j in celltypist_rename if any((re.search(
# i.lower(), x.name.lower()) for i in celltypist_rename[
# j]))])}).apply(lambda x: {list(x.keys())[0]: list(x.keys(
# ))[0]} if x[list(x.keys())[0] ] == "" else x).apply(
# lambda x: pd.Series(x)).stack().reset_index(
# 0, drop=True)
# if "annotation_majority_voting" in self.rna.obs:
# self.rna.obs = self.rna.obs.drop("annotation_majority_voting", axis=1)
# self.rna.obs = self.rna.obs.join(self.rna.obs.replace({
# "majority_voting": dict(rn_ct)})["majority_voting"].to_frame(
# "annotation_majority_voting"))
# self.rna.X = self.layers["scaled"].copy()
Annotate with Map My Cells¶
Make sure to run the following bash commonds after activating the conda environment used for this notebook.
Pull
cell_type_mapperfrom GitHub (clone into your home directory):cd && git clone git@github.com:AllenInstitute/cell_type_mapper.gitNavigate to that directory and run
pip install .Navigate to the folder containing this notebook.
Install ABC Atlas (while in same directory as this notebook):
pip install -U git+https://github.com/alleninstitute/abc_atlas_access >& scratch/junk.txtPull lookup files (while in same directory as this notebook):
cd resources
wget https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/mapmycells/WMB-10X/20240831/mouse_markers_230821.json
wget https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/mapmycells/WMB-10X/20240831/precomputed_stats_ABC_revision_230821.h5
Note: To use GPU + Torch, you may need to alter the file "cell_type_mapper/src/cell_type_mapper/cell_by_gene/cell_by_gene.py" line np.where(np.logical_not(np.isfinite(data)))[0] to read instead
try:
nan_rows = np.where(
np.logical_not(np.isfinite(data.cpu().numpy())))[0]
except Exception:
nan_rows = np.where(np.logical_not(np.isfinite(data)))[0]
You may have to run the following code in this notebook:
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"
and
`_correlation_dot_gpu()` in distance_utils.py change
`correlation = torch.matmul(arr0, arr1)` to
try:
correlation = torch.matmul(arr0, arr1)
except RuntimeError as err:
if "CUBLAS_STATUS_NOT_INITIALIZED" in str(err):
arr0_cpu = arr0.cpu()
arr1_cpu = arr1.cpu()
correlation = torch.matmul(arr0_cpu, arr1_cpu).to(arr0.device)
else:
raise
to manage processes/memory.
# %%time
# # Write File to Use as Input for Map My Cells
# if overwrite is True or not os.path.exists(file_new):
# os.makedirs("data", exist_ok=True)
# self.rna.X = self.rna.layers["counts"]
# self.rna.write_h5ad(file_new)
# else:
# raise ValueError("Must be able to overwrite to run Map My Cells.")
# # Run Map My Cells
# self.rna = scflow.pp.run_mapbraincells(
# file_new, map_my_cells_source=map_my_cells_source,
# dir_scratch="scratch", dir_resources="resources",
# validate_output_file="scratch/tmp.h5ad", # map_to_ensembl=True,
# map_my_cells_region_keys=map_my_cells_region_keys,
# map_my_cells_cell_keys=map_my_cells_cell_keys, verbose_stdout=True,
# n_processors=4, chunk_size=5000, max_gb=5)
# View Results
# _ = self.plot(kind="umap", color=["cellmap_class_name"])
# if "annotation_toppgene" in self.rna.obs:
# print(self.rna.obs[["cellmap_class_name", "annotation_toppgene"]
# ].value_counts().sort_index())
# self.rna.obs[[i for i in self.rna.obs if "cellmap" in i and "ori" not in i]]
Compare Annotations¶
cols = [i for i in ["annotation_by_overlap", "annotation_toppgene",
"annotation_majority_voting"] if i in self.rna.obs]
# cols += [i for i in [
# "cellmap_class_name", "cellmap_subclass_name"] if i in self.rna.obs]
# Plot UMAPs
self.plot(kind="umap", color=cols, wspace=0.3)
# Compare
self.rna.obs[cols].value_counts().sort_index()
annotation_by_overlap annotation_toppgene
Neuron OPC 21527
1 19639
Excitatory 8753
5 19269
Oligodendrocyte Oligodendrocyte 18451
Endothelial Excitatory 2158
Astrocyte Astrocyte 7160
OPC OPC 3117
Microglial Microglial 3200
Name: count, dtype: int64
OSD-613-Specific¶
Clean variable names & keys
self.rna.obs.loc[:, "Age_Start"] = self.rna.obs["Factor Value[Age]"].astype(
str) + " Weeks"
self.rna.obs.loc[:, "Age_End"] = self.rna.obs[
"Characteristics[Age at Euthanasia]"].astype(str) + " Weeks"
self.rna.obs.loc[:, "Condition"] = self.rna.obs["Factor Value[Spaceflight]"]
if "cellmap_class_name" in self.rna.obs: # cell name w/o region cues
self.rna.obs.loc[:, "cellmap_class_name_collapsed"] = self.rna.obs[
"cellmap_class_name"].apply(lambda x: x + "***" + str(
"Glutamatergic-" if ("glut" in x.lower()) else "") + str(
"GABAergic-" if "gaba" in x.lower() else "") + str(
"Oligodendrocyte-" if "oligo" in x.lower() else "") + str(
"Cholinergic-" if "chol" in x.lower() else ""))
self.rna.obs.loc[:, "cellmap_class_name_collapsed"] = self.rna.obs[
"cellmap_class_name_collapsed"].apply(
lambda x: x.split("***")[0 if x.split("***")[1] == "" else 1])
self.rna.obs.loc[:, "cellmap_class_name_collapsed"] = self.rna.obs[
"cellmap_class_name_collapsed"].apply(
lambda x: x[:-1] if x[-1] == "-" else x)
Test¶
Make sure preprocessing properly implemented
# for x in kws_pp:
# print(f"\n\n{'=' * 80}\n{x}\n{'=' * 80}\n")
# ann = self.rna[self.rna.obs[col_sample] == x]
# if join_method == "inner":
# assert all(ann.var["n_cells_by_counts"] >= kws_pp[
# x]["min_max_cells"][0])
# assert all(ann.var["n_cells_by_counts"] <= kws_pp[
# x]["min_max_cells"][1]) if kws_pp[x]["min_max_cells"][1] else True
# assert all(ann.obs["n_genes"] >= kws_pp[x]["min_max_genes"][0])
# assert all(ann.obs["n_genes"] <= kws_pp[x][
# "min_max_genes"][1]) if kws_pp[x]["min_max_genes"][1] else True
# assert all(ann.obs["pct_counts_mt"] <= kws_pp[x]["max_mt"])
# assert all(ann.obs["total_counts"] >= kws_pp[x]["min_max_counts"][0])
# assert all(ann.obs["total_counts"] <= kws_pp[x]["min_max_counts"][1]) if (
# kws_pp[x]["min_max_counts"][1]) else True
# print(kws_pp[x])
# print(ann.obs[["n_genes", "pct_counts_mt", "total_counts"]
# ].describe().loc[["min", "max"]])
# print(ann.var[["n_cells_by_counts"]].describe().loc[[
# "min", "max"]])
Final Write¶
# Descriptives
if all((i in self.rna.obs for i in [
"Factor Value[Age]", "Characteristics[Age at Euthanasia]"])):
print(self.rna.obs[["Factor Value[Age]",
"Characteristics[Age at Euthanasia]"]].value_counts())
# Write h5ad
self.rna.X = self.rna.layers["counts"].copy()
if overwrite is True or not os.path.exists(file_new):
print("\n\n", f"Writing file to {file_new}...")
self.rna.write_h5ad(file_new)
# Write Version Compatible with Older Packages
adata = self.rna.copy()
adata.uns = {}
adata.write_h5ad(os.path.splitext(file_new)[0] + "_compatible.h5ad")
# Send Email with Output When Done
if email is not None and html_out is not None:
os.system(f"jupyter nbconvert --to html {cur_file}")
os.system(f"echo 'yay' | mutt -s 'JOB DONE' -a {html_out} -- {email}")
Factor Value[Age] Characteristics[Age at Euthanasia] 29 37 55266 12 20 48008 Name: count, dtype: int64 Writing file to data/OSD-613_integrated.h5ad...
Scratch (IGNORE)¶
# # Integrate
# join_method = "inner"
# cct = "leiden_individual" if kws_cluster is not None else None
# kws_integrate = {"col_celltype": cct,
# "flavor": "scanvi",
# # "flavor": "scvi",
# # "flavor": "scanorama",
# # "flavor": "harmony",
# "kws_pp": None, "kws_cluster": None, "n_top_genes": 1000,
# "join": join_method, "merge": "same",
# "col_batch": None, # suppress using batch as covariate
# "drop_non_hvgs": False,
# "use_rapids": True,
# "fill_value": np.nan if join_method == "outer" else None,
# "out_file": file_new}
# self = scflow.Rna(sc.read(file_new), col_sample=col_sample,
# col_batch=col_batch, kws_integrate=kws_integrate)
# # Write Files for Processed/Integrated Objects?
# if overwrite is True or not os.path.exists(file_new):
# self.rna.write_h5ad(file_new)
# # Display
# print(self.rna)
# self.rna.obs